library(readr)
library(tidyverse)
library(ggplot2)
library(stringr)
library(gridExtra)
library(ggpattern)
library(openxlsx)
library(ggpubr)
library(lme4)
library(ggsignif)
#### Taipei
# 17 subjects from Taipei including the younger group (4M,4F) ages from 20-35, while the older group (4M, 5F) from 50-65.
multmerge_n <- function(mypath){
filenames <- list.files(mypath, pattern = "^N_")
age <- c("O", "Y", "Y", "Y",
"O", "Y", "O", "Y", "O", "Y",
"Y", "O", "O", "O", "O", "Y", "O")
gender <- c("M", "M", "F", "M",
"M", "M", "F" ,"F", "M", "F",
"F", "F", "F", "F", "F", "M", "M")
datalist <- Map(function(x, y, z){
f <- read_csv(x, locale = locale(encoding = "utf8"))
f1 <- f %>%
rename(order = ...1) %>%
mutate(id = x, age = y, gender = z)
return(f1)}, filenames, age, gender)
Reduce(function(x,y) {bind_rows(x,y)}, datalist)
}
mycorpus_n <- multmerge_n("./")
nonchi_syll <- mycorpus_n$syll_c[str_detect(mycorpus_n$syll_c,"[a-zA-Z]")]
nonchi_word <- mycorpus_n$word_c[str_detect(mycorpus_n$word_c,"[a-zA-Z]")]
n_syll <- mycorpus_n %>%
select(syll_e, syll_c, id, age, gender, order) %>%
filter(!is.na(syll_e)) %>%
filter(!syll_c %in% nonchi_syll) %>%
mutate(area = "N")
n_word <- mycorpus_n %>%
select(word_e, word_c, id, age, gender, order) %>%
filter(!is.na(word_e)) %>%
filter(!word_c %in% nonchi_word) %>%
mutate(area = "N")
#### Kaohsiung
# 15 subjects from Kaohsiung, including the younger group (4M,4F) ages from 20-35, while the older group (4M, 3F) from 50-65.
multmerge_s <- function(mypath){
filenames <- list.files(mypath, pattern = "^S_")
age <- c("Y", "Y", "O", "Y", "Y",
"O", "Y", "O", "Y", "O",
"O", "Y", "O", "Y", "O")
gender <- c("F", "F", "M", "M", "M",
"F", "F", "F", "M", "M",
"F", "M", "M", "F", "M")
datalist <- Map(function(x, y, z){
f <- read_csv(x, locale = locale(encoding = "utf8"))
f1 <- f %>%
rename(order = ...1) %>%
mutate(id = x, age = y, gender = z)
return(f1)}, filenames, age, gender)
Reduce(function(x,y) {bind_rows(x,y)}, datalist)
}
mycorpus_s <- multmerge_s("./")
nonchi_syll <- mycorpus_s$syll_c[str_detect(mycorpus_s$syll_c,"[a-zA-Z]")]
nonchi_word <- mycorpus_s$word_c[str_detect(mycorpus_s$word_c,"[a-zA-Z]")]
s_syll <- mycorpus_s %>%
select(syll_e, syll_c, id, age, gender, order) %>%
filter(!is.na(syll_e)) %>%
filter(!syll_c %in% nonchi_syll) %>%
mutate(area = "S")
s_word <- mycorpus_s %>%
select(word_e, word_c, id, age, gender, order) %>%
filter(!is.na(word_e)) %>%
filter(!word_c %in% nonchi_word) %>%
mutate(area = "S")
## compile all the syllables and words
syll <- bind_rows(n_syll, s_syll)
word <- bind_rows(n_word, s_word)
#Data pre-processing
n_ai <- read_csv("data - N2.csv", col_types = cols(gender = col_character(),
stress = col_factor(),
n = col_character()),
locale = locale(encoding = "utf8"))
s_ai <- read_csv("data - S2.csv", col_types = cols(gender = col_character(),
stress = col_factor(),
n = col_character()),
locale = locale(encoding = "utf8"))
data <- bind_rows(n_ai, s_ai)
len <- read_csv("df2.csv")
## Rows: 31 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Filename, area
## dbl (1): length
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data5 <- right_join(data, len, by = c("Filename", "area"))
df5 <- read_csv("df5.csv", col_types = cols(n = col_character()))
data6 <- right_join(data5, df5, by = c("Filename", "n"))
blocks_even <- read_csv("df4.csv", col_types = cols(n = col_character()))
data7 <- right_join(data6, blocks_even, by = c("Filename", "n"))
#Data pre-processing
excluded <- c("truncated", "laughter", "N", "noise", "overlap", "o", "error", "deleted", "esch", "sche", "scha", "ae")
data1 <- data7 %>%
filter(dur <= 0.3) %>%
filter(!realization %in% excluded) %>%
mutate(realization = gsub("^ai$", "[aɪ]", realization)) %>%
mutate(realization = gsub("^ei$", "[eɪ]", realization)) %>%
mutate(realization = gsub("^e$", "[e]", realization)) %>%
mutate(realization = gsub("^a$", "[a]", realization)) %>%
mutate(realization = gsub("^sch$", "[É™]", realization)) %>%
mutate(realization = gsub("^esch$", "[eÉ™]", realization)) %>%
mutate(realization = gsub("^asch$", "[aÉ™]", realization)) %>%
mutate(realization = gsub("^ae$", "[aÉ›]", realization)) %>%
mutate(dm = ifelse(realization == "[aɪ]", "[aɪ]",
ifelse(realization %in% c("[eɪ]", "[aə]"), "D", "M"))) %>%
mutate(wl = gsub("^u_", "", place)) %>%
mutate(wl = gsub("_[a-z]+", "", wl)) %>%
mutate(context = gsub("^u_", "", place)) %>%
mutate(context = gsub("[a-z]+_[1-9]_", "", context)) %>%
mutate(con = ifelse(is.na(`1st`), "Head", ifelse(is.na(`3rd`), "Tail", "Middle"))) %>%
mutate(context = ifelse(context %in% c("zh", "ch", "sh", "r", "z", "c", "s", "i",
"y", "d", "t", "n", "l", "j", "q", "x"), "cor",
ifelse(context %in% c("b", "p", "m", "f", "w"), "lab",
ifelse(context %in% c("g", "k", "h", "o", "u", "e",
"a"), "dor", "else")))) %>%
mutate(onset = gsub("(ai|uai)[01234]", "", label)) %>%
mutate(onset1 = ifelse(onset %in% c("zh", "ch", "sh", "r", "z", "c", "s", "i",
"y", "d", "t", "n", "l", "j", "q", "x"), "cor",
ifelse(onset %in% c("b", "p", "m", "f", "w"), "lab",
ifelse(onset %in% c("g", "k", "h", "o", "u", "e",
"a", ""), "dor", "else")))) %>%
mutate(pos = gsub("[mbtqph]_1", "head", wl)) %>%
mutate(pos = gsub("[tqph]_2", "central", pos)) %>%
mutate(pos = gsub("[qph]_3", "central", pos)) %>%
mutate(pos = gsub("[ph]_4", "central", pos)) %>%
mutate(pos = gsub("h_3", "central", pos)) %>%
mutate(pos = gsub("h_5", "central", pos)) %>%
mutate(pos = gsub("b_2", "tail", pos)) %>%
mutate(pos = gsub("t_3", "tail", pos)) %>%
mutate(pos = gsub("q_4", "tail", pos)) %>%
mutate(pos = gsub("p_5", "tail", pos)) %>%
mutate(pos = gsub("h_6", "tail", pos))
###POS examination
toPOS <- data1 %>%
distinct(word)
#write_csv(toPOS, "toPOS.csv")
pos <- read_csv("POS1.csv", locale = locale(encoding = "utf-8"))
## Rows: 831 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): POS, word_c
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pos1 <- rename(pos, word = word_c)
C <- c("Caa", "Cbb")
ADV <- c("Da", "Dfa", "Dfb", "D", "Dk")
POST <- c("Cab", "Cba", "Neqb")
ASP <- c("Di")
N <- c("Na", "Nb", "Nc", "Nd", "Ncd", "Nh", "Ng")
DET <- c("Neu", "Nes", "Nep", "Neqa")
M <- c("Nf")
T1 <- c("I", "T", "DE")
Vi <- c("VA", "VB", "VH", "VI")
Vt <- c("VAC", "VC", "VCL", "VD", "VE", "VF", "VG", "VHC", "VJ", "VK", "VL", "SHI", "V_2")
for (i in seq(length(pos1$POS))) {
if (pos1$POS[i] %in% C) {
pos1$POS[i] <- "C"
} else if (pos1$POS[i] %in% ADV) {
pos1$POS[i] <- "ADV"
} else if (pos1$POS[i] %in% POST) {
pos1$POS[i] <- "else"
} else if (pos1$POS[i] %in% ASP) {
pos1$POS[i] <- "else"
} else if (pos1$POS[i] %in% N) {
pos1$POS[i] <- "N"
} else if (pos1$POS[i] %in% DET) {
pos1$POS[i] <- "DET"
} else if (pos1$POS[i] %in% M) {
pos1$POS[i] <- "M"
} else if (pos1$POS[i] %in% T1) {
pos1$POS[i] <- "T"
} else if (pos1$POS[i] %in% Vi) {
pos1$POS[i] <- "V"
} else if (pos1$POS[i] %in% Vt) {
pos1$POS[i] <- "V"
} else if (pos1$POS[i] == "COMMACATEGORY") {
pos1$POS[i] <- "else"
} else if (pos1$POS[i] == "PERIODCATEGORY") {
pos1$POS[i] <- "else"
}
}
pos2 <- pos1 %>%
mutate(POS = ifelse(POS %in% c("N", "V", "ADV", "A"), "content", "function"))
data2 <- right_join(data1, pos2, by = "word")
tidy <- read_csv("data - frequency.csv")
## Rows: 9957 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Word, Pinyin
## dbl (5): No, Rank, Frequency, Percent, Cumulation
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
tidy1 <- tidy %>%
filter(Pinyin %in% Pinyin[str_detect(Pinyin, "ai")]) %>%
mutate(No = seq(1:n()))
word_h <- tidy1 %>%
filter(Cumulation <= 50)
word_l <- tidy1 %>%
filter(Cumulation > 50)
data3 <- data2 %>%
mutate(ag = paste0(area, age, gender)) %>%
mutate(freq = ifelse(word %in% word_h$Word, "H", "L"))
data3$tone <- str_replace(data3$label, "[a-z]+", "")
data4 <- data3 %>%
filter(tone != "0") %>%
filter(stress != "0")
foc <- data4 %>%
group_by(Filename, word, freq) %>%
filter(order == min(order)) %>%
ungroup() %>%
mutate(giv = "foc")
rest <- data4 %>%
group_by(Filename, word, freq) %>%
filter(order != min(order)) %>%
ungroup() %>%
mutate(giv = "soc")
data3 <- bind_rows(foc, rest)
##number of tokens across groups and realizations
data3 %>%
group_by(realization) %>%
count() %>%
arrange(desc(n))
## # A tibble: 6 × 2
## # Groups: realization [6]
## realization n
## <chr> <int>
## 1 [aɪ] 4677
## 2 [e] 2795
## 3 [É™] 1023
## 4 [a] 506
## 5 [eɪ] 242
## 6 [aÉ™] 74
##overall reduction rate
data3 %>%
mutate(sum = length(Filename)) %>%
mutate(dm = ifelse(realization == "[aɪ]", "[aɪ]",
ifelse(realization %in% c("[eɪ]", "[aə]"), "D", "M"))) %>%
group_by(sum, dm) %>%
reframe(n = n(), p = n/sum*100) %>%
distinct()
## # A tibble: 3 × 4
## sum dm n p
## <int> <chr> <int> <dbl>
## 1 9317 D 316 3.39
## 2 9317 M 4324 46.4
## 3 9317 [aɪ] 4677 50.2
dm3 <- data3 %>%
mutate(stress = "all") %>%
mutate(order = ifelse(realization == "[aɪ]", 6,
ifelse(realization == "[eɪ]", 5,
ifelse(realization == "[aÉ™]", 4,
ifelse(realization == "[e]", 3,
ifelse(realization == "[a]", 2, 1)))))) %>%
group_by(realization, stress, order) %>%
summarise(sum = 9326, n = n(), p = 100*n/sum) %>%
ungroup() %>%
select(-sum, -n) %>%
mutate(realization = as.factor(realization)) %>%
arrange(realization) %>%
mutate(realization = fct_reorder(realization, order))
## `summarise()` has grouped output by 'realization', 'stress'. You can override
## using the `.groups` argument.
f_rea <- ggplot(dm3, aes(x= stress, y = p, fill = realization)) +
geom_bar(stat = "identity", position="stack", width = .6, color = "black", alpha = .9,
show.legend = FALSE) +
labs(x = "", y = "Percentage of number", fill = "") +
geom_text(aes(label = realization), size = 14, check_overlap = TRUE,
position = position_stack(vjust = 0.5)
) +
scale_y_continuous(expand = c(0,0), limit = c(0,100)) +
theme_bw() +
theme(plot.title = element_text(size = 26, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 26),
strip.text.x = element_text(size = 26),
legend.title = element_text(size = 26),
legend.text = element_text(size = 26),
legend.position = "top",
plot.margin = margin(1, 1, 1, 1, "cm"),
axis.title = element_text(size = 26),
axis.text.y=element_blank(),
axis.ticks.y=element_blank(),
panel.background=element_rect(colour="black", size = 1)) +
coord_flip() +
scale_fill_manual(
values = c("#9575cd", "#7986cb", "#729fed","#ffdf45", "#ffb74d", "#f8766d"),
limits = c("[a]", "[ə]","[e]", "[aə]", "[eɪ]", "[aɪ]"),
breaks = c("[a]", "[ə]","[e]", "[aə]", "[eɪ]", "[aɪ]"),
name = "Realization",
labels = c("[a]", "[ə]","[e]", "[aə]", "[eɪ]", "[aɪ]"),
)
## Warning: The `size` argument of `element_rect()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
f_rea
#ggsave("f_rea.png", dpi = 600, width = 20, height = 5)
##POS reduction rate
s_r1 <- data3 %>%
group_by(POS, stress) %>%
mutate(count = n()) %>%
ungroup() %>%
mutate(order = ifelse(dm == "[aɪ]", 3,
ifelse(dm == "D", 2, 1))) %>%
mutate(POS = ifelse(POS == "function", "Function", "Content")) %>%
mutate(stress= ifelse(stress == 2, "S2", ifelse(stress == 3, "S3", "S1"))) %>%
mutate(dm = ifelse(dm == "M", "Monophthongs", ifelse(dm == "D", "Diphthongs", "Retained [aɪ]"))) %>%
mutate(stress = as.factor(stress)) %>%
group_by(dm, stress, POS) %>%
reframe(num = n(), p = 100*num/count) %>%
ungroup() %>%
distinct() %>%
filter(dm == "Retained [aɪ]")
f_dm1 <- ggplot(s_r1, aes(x= stress, y = p, fill = POS)) +
geom_bar(stat = "identity",
position = position_dodge(preserve = "single"),
width = .7, color = "black") +
labs(x = "Stress",
y = "Ratio of retained [aɪ]",
fill = "") +
geom_text(aes(label = round(p)), size = 5, check_overlap = FALSE,
position = position_dodge(width = .7), vjust = 4) +
scale_y_continuous(expand = c(0,0), limit = c(0,100)) +
theme_bw() +
scale_fill_manual(
values = c("#f8766d", "#729fed"),
limits = c("Content", "Function"),
breaks = c("Content", "Function"),
name = "Word Category",
labels = c("Content", "Function")
) +
theme(plot.title = element_text(size = 16, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 14),
strip.text.x = element_text(size = 16),
legend.title = element_text(size = 14),
legend.text = element_text(size = 14),
axis.title = element_text(size = 16),
#plot.margin = margin(0, 1, 0, 0, "cm"),
panel.background=element_rect(colour="black", size = 1),
axis.text.y = element_text(size = 16))
f_dm1
#ggsave("f1_c.png", dpi = 600, width = 8, height = 5)
##freq reduction rate
s_r2 <- data3 %>%
filter(POS != "function") %>%
group_by(freq,
giv,
stress) %>%
mutate(count = n()) %>%
ungroup() %>%
mutate(order = ifelse(dm == "[aɪ]", 3,
ifelse(dm == "D", 2, 1))) %>%
filter(freq != "M") %>%
mutate(freq = ifelse(freq == "H", "High", "Low")) %>%
mutate(stress= ifelse(stress == 2, "S2", ifelse(stress == 3, "S3", "S1"))) %>%
mutate(giv = ifelse(giv == "foc", "First", "Second")) %>%
mutate(dm = ifelse(dm == "M", "Monophthongs", ifelse(dm == "D", "Diphthongs", "Retained [aɪ]"))) %>%
mutate(type = paste(freq, giv)) %>%
mutate(stress = as.factor(stress)) %>%
group_by(dm, stress, freq, giv, type) %>%
reframe(num = n(), p = 100*num/count) %>%
ungroup() %>%
distinct() %>%
filter(dm == "Retained [aɪ]")
f_dm2 <- ggplot(s_r2, aes(x= stress, y = p, fill = type)) +
geom_bar(stat = "identity",
position = position_dodge(preserve = "single"),
width = .8,
color = "black") +
labs(x = "Stress",
y = "Ratio of retained [aɪ]",
fill = "") +
geom_text(aes(label = round(p)), size = 5, check_overlap = FALSE,
position = position_dodge(width = .8), vjust = 5) +
scale_y_continuous(expand = c(0,0), limit = c(0,100)) +
theme_bw() +
scale_fill_manual(
values = c("light blue", "#729fed", "pink", "#f8766d"),
limits = c("High First", "High Second", "Low First", "Low Second"),
breaks = c("High First", "High Second", "Low First", "Low Second"),
name = "",
labels = c("High First", "High Second", "Low First", "Low Second")
) +
theme(plot.title = element_text(size = 16, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 16),
strip.text.x = element_text(size = 16),
legend.title = element_text(size = 14),
legend.text = element_text(size = 14),
axis.title = element_text(size = 16),
#plot.margin = margin(0, 1, 0, 0, "cm"),
panel.background=element_rect(colour="black", size = 1),
axis.text.y = element_text(size = 16))
f_dm2
#Statistics
lr_f <- data3 %>%
#filter(giv != "soc") %>%
#filter(stress != 1) %>%
#filter(area == "S") %>%
#filter(gender == "F") %>%
#filter(age == "Y") %>%
#filter(POS != "function") %>%
#filter(pos != "central") %>%
#filter(freq != "M") %>%
mutate(reduce = ifelse(realization == "[aɪ]", "1", "0")) %>%
mutate(giv = as.factor(giv)) %>%
mutate(freq = as.factor(freq)) %>%
mutate(POS = as.factor(POS)) %>%
mutate(pos = as.factor(pos)) %>%
mutate(pos = as.factor(con)) %>%
mutate(stress = as.factor(stress)) %>%
mutate(gender = as.factor(gender)) %>%
mutate(area = as.factor(area)) %>%
mutate(age = as.factor(age)) %>%
mutate(reduce = as.factor(reduce))
#model_r <- glmer(reduce~giv*stress*freq + (1|Filename), data = lr_f, family = binomial)
model_r <- glmer(reduce~POS*stress + age*gender*area + (1|Filename), data = lr_f, family = binomial)
summary(model_r)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: reduce ~ POS * stress + age * gender * area + (1 | Filename)
## Data: lr_f
##
## AIC BIC logLik deviance df.resid
## 12055.0 12154.9 -6013.5 12027.0 9303
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6462 -0.9155 0.2743 0.8925 2.2536
##
## Random effects:
## Groups Name Variance Std.Dev.
## Filename (Intercept) 0.08698 0.2949
## Number of obs: 9317, groups: Filename, 31
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.45171 0.15975 2.828 0.00469 **
## POSfunction -0.70174 0.05545 -12.657 < 2e-16 ***
## stress1 -1.05288 0.07595 -13.862 < 2e-16 ***
## stress3 1.81288 0.16890 10.733 < 2e-16 ***
## ageY -0.27417 0.22410 -1.223 0.22118
## genderM -0.12992 0.22695 -0.572 0.56700
## areaS -0.14704 0.24865 -0.591 0.55428
## POSfunction:stress1 0.32709 0.14774 2.214 0.02683 *
## POSfunction:stress3 0.92445 0.55379 1.669 0.09506 .
## ageY:genderM 0.01590 0.31857 0.050 0.96021
## ageY:areaS 0.43517 0.33564 1.297 0.19479
## genderM:areaS 0.13400 0.33704 0.398 0.69094
## ageY:genderM:areaS -0.37914 0.46375 -0.818 0.41361
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 13 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
s_r <- foc %>%
group_by(freq, POS, age, gender, area, stress) %>%
count()
s_r1 <- foc %>%
select(-n) %>%
mutate(order = ifelse(dm == "[aɪ]", 3,
ifelse(dm == "D", 2, 1)))
dm1 <- right_join(s_r, s_r1, by = c("freq", "POS", "age", "gender", "area", "stress"))
dm <- dm1 %>%
filter(freq != "M") %>%
mutate(stress= ifelse(stress == 2, "S2", ifelse(stress == 3, "S3", "S1"))) %>%
mutate(dm = ifelse(dm == "M", "Monophthongs", ifelse(dm == "D", "Diphthongs", "Retained [aɪ]"))) %>%
mutate(stress = as.factor(stress)) %>%
group_by(dm, stress, freq, POS, area, gender, age, order) %>%
reframe(num = n(), p = 100*num/n) %>%
ungroup() %>%
distinct() %>%
select(-num) %>%
mutate(dm = as.factor(dm)) %>%
arrange(dm) %>%
mutate(dm = fct_reorder(dm, order))
dm2 <- dm %>%
filter(dm == "Retained [aɪ]") %>%
spread(stress, p) %>%
mutate(df12 = S2-S1, df23 = S3-S2)
lr_f <- dm2 %>%
filter(POS != "function") %>%
mutate(POS = as.factor(POS)) %>%
mutate(gender = as.factor(gender)) %>%
mutate(area = as.factor(area)) %>%
mutate(age = as.factor(age))
#model_r <- aov(dff~age*gender*area + Error(1/stress), data = lr_f)
model_r <- aov(df23~age*gender*area + Error(1/freq), data = lr_f)
summary(model_r)
##
## Error: Within
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 15.4 15.4 0.049 0.831
## gender 1 151.1 151.1 0.480 0.511
## area 1 74.2 74.2 0.235 0.642
## age:gender 1 8.7 8.7 0.028 0.872
## age:area 1 399.5 399.5 1.268 0.297
## gender:area 1 90.1 90.1 0.286 0.609
## age:gender:area 1 384.0 384.0 1.219 0.306
## Residuals 7 2204.8 315.0
s_r3 <- data3 %>%
filter(dm == "M") %>%
filter(stress != 3) %>%
filter(stress == 1) %>%
filter(POS == "content") %>%
group_by(age, gender) %>%
mutate(count = n()) %>%
ungroup() %>%
mutate(order2 = ifelse(realization == "[e]", 3,
ifelse(realization == "[a]", 2, 1))) %>%
mutate(POS = ifelse(POS == "function", "Function word", "Content word")) %>%
mutate(stress= ifelse(stress == 2, "S2", ifelse(stress == 3, "S3", "S1"))) %>%
mutate(area = ifelse(area == "N", "North", "South")) %>%
mutate(gender = ifelse(gender == "F", "Female", "Male")) %>%
mutate(age = ifelse(age == "O", "Old", "Young")) %>%
mutate(stress = as.factor(stress)) %>%
group_by(realization, age, gender,
order2) %>%
reframe(num = n(), p = 100*num/count) %>%
ungroup() %>%
distinct() %>%
mutate(realization = as.factor(realization)) %>%
arrange(realization) %>%
mutate(realization = fct_reorder(realization, order2))
f_dm <- ggplot(s_r3, aes(x= gender, y = p, fill = realization)) +
geom_bar(stat = "identity",
width = .5, color = "black") +
labs(x = "Gender",
y = "Ratio of monophthongized /aɪ/",
fill = "") +
geom_text(aes(label = round(p)), size = 6, check_overlap = TRUE,
position = position_stack(vjust = 0.5)
) +
scale_y_continuous(expand = c(0,0), limit = c(0,100.2)) +
theme_bw() +
theme(plot.title = element_text(size = 16, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 12),
strip.text.x = element_text(size = 16),
strip.text.y = element_text(size = 16),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14),
axis.title = element_text(size = 16),
plot.margin = margin(0, 1, 0, 0, "cm"),
panel.background=element_rect(colour="black", size = 1),
axis.text.y = element_text(size = 16)) +
facet_grid(~age)
f_dm
#ggsave("f1mo_ag.png", dpi = 600, width = 8, height = 4)
s_r4 <- data3 %>%
filter(giv == "soc") %>%
filter(stress != 3) %>%
filter(dm == "M") %>%
filter(POS == "content") %>%
group_by(freq, stress) %>%
mutate(count = n()) %>%
ungroup() %>%
mutate(order2 = ifelse(realization == "[e]", 3,
ifelse(realization == "[a]", 2, 1))) %>%
mutate(POS = ifelse(POS == "function", "Function word", "Content word")) %>%
mutate(age = ifelse(age == "O", "Old", "Young")) %>%
mutate(area = ifelse(area == "N", "North", "South")) %>%
mutate(freq = ifelse(freq == "H", "High", "Low")) %>%
mutate(stress= ifelse(stress == 2, "S2", ifelse(stress == 3, "S3", "S1"))) %>%
mutate(stress = as.factor(stress)) %>%
group_by(realization, order2, stress, freq) %>%
reframe(num = n(), p = 100*num/count) %>%
ungroup() %>%
distinct() %>%
mutate(realization = as.factor(realization)) %>%
arrange(realization) %>%
mutate(realization = fct_reorder(realization, order2))
f_dm4 <- ggplot(s_r4, aes(x= stress, y = p, fill = realization)) +
geom_bar(stat = "identity",
width = .5, color = "black") +
labs(x = "Stress",
y = "Ratio of monophthongized /aɪ/",
fill = "") +
geom_text(aes(label = round(p)), size = 6, check_overlap = TRUE,
position = position_stack(vjust = 0.5)
) +
scale_y_continuous(expand = c(0,0), limit = c(0,100.2)) +
theme_bw() +
theme(plot.title = element_text(size = 16, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 12),
strip.text.x = element_text(size = 16),
strip.text.y = element_text(size = 16),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14),
axis.title = element_text(size = 16),
plot.margin = margin(0, 1, 0, 0, "cm"),
panel.background=element_rect(colour="black", size = 1),
axis.text.y = element_text(size = 16)) +
facet_grid(~freq)
f_dm4
#ggsave("f1mo_old_freq.png", dpi = 600, width = 8, height = 4)
library(RVAideMemoire)
## Warning: package 'RVAideMemoire' was built under R version 4.3.1
## *** Package RVAideMemoire v 0.9-83-7 ***
##
## Attaching package: 'RVAideMemoire'
## The following object is masked from 'package:lme4':
##
## dummy
chi <- data3 %>%
filter(dm == "M") %>%
filter(POS == "function") %>%
filter(stress != 3) %>%
mutate(stress = ifelse(stress == "2", "S2", "S1"))
# Stress in cont and func
table1 <- table(chi$realization, chi$stress)
chisq.test(table1, correct=F)
##
## Pearson's Chi-squared test
##
## data: table1
## X-squared = 16.476, df = 2, p-value = 0.0002644
chisq.multcomp(table1, p.method = "holm")
##
## Pairwise comparisons using chi-squared tests
##
## data: table1
##
## 14 58 98 179 228
## 58 8.6e-07 - - - -
## 98 1.4e-14 0.0027 - - -
## 179 < 2e-16 2.3e-14 3.4e-06 - -
## 228 < 2e-16 < 2e-16 3.0e-12 0.0151 -
## 747 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
##
## P value adjustment method: holm
table1
##
## S1 S2
## [a] 14 58
## [e] 179 747
## [É™] 98 228
#post hoc residuals
chisq.test(table1, correct=F)$stdres
##
## S1 S2
## [a] -0.5340425 0.5340425
## [e] -3.5496260 3.5496260
## [É™] 4.0590504 -4.0590504
sig <- .05
sigadj <- sig/(nrow(table1)*ncol(table1))
sigadj
## [1] 0.008333333
qnorm(sigadj/2)
## [1] -2.638257
chi2 <- data3 %>%
filter(dm == "M") %>%
filter(stress == "2")
# POS in S2 and S1
table2 <- table(chi2$realization, chi2$POS)
chisq.test(table2, correct=F)
##
## Pearson's Chi-squared test
##
## data: table2
## X-squared = 42.287, df = 2, p-value = 6.568e-10
chisq.multcomp(table2, p.method = "holm")
##
## Pairwise comparisons using chi-squared tests
##
## data: table2
##
## 58 228 302 460 747
## 228 < 2e-16 - - - -
## 302 < 2e-16 0.0013 - - -
## 460 < 2e-16 < 2e-16 2.1e-08 - -
## 747 < 2e-16 < 2e-16 < 2e-16 4.3e-16 -
## 1528 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
##
## P value adjustment method: holm
table2
##
## content function
## [a] 302 58
## [e] 1528 747
## [É™] 460 228
#post hoc residuals
chisq.test(table2, correct=F)$stdres
##
## content function
## [a] 6.501107 -6.501107
## [e] -3.209054 3.209054
## [É™] -1.306640 1.306640
sig <- .05
sigadj <- sig/(nrow(table2)*ncol(table2))
sigadj
## [1] 0.008333333
qnorm(sigadj/2)
## [1] -2.638257
chi3 <- data3 %>%
filter(dm == "M") %>%
filter(giv == "soc") %>%
#filter(freq != "L") %>%
filter(stress != 3) %>%
filter(stress == 2) %>%
mutate(stress = ifelse(stress == "2", "S2", "S1")) %>%
filter(POS == "content")
#freq in S1, not in S2
#stress low: [e], high: all
#gender/age in S1: young more [a], male more [e], fewer schwa
#area in S1 low: south more schwa, fewer [e]
table3 <- table(chi3$realization, chi3$freq)
chisq.test(table3, correct=F)
##
## Pearson's Chi-squared test
##
## data: table3
## X-squared = 1.4724, df = 2, p-value = 0.4789
chisq.multcomp(table3, p.method = "holm")
##
## Pairwise comparisons using chi-squared tests
##
## data: table3
##
## 73 129 154 225 381
## 129 0.00024 - - - -
## 154 3.8e-07 0.13725 - - -
## 225 < 2e-16 1.3e-06 0.00053 - -
## 381 < 2e-16 < 2e-16 < 2e-16 1.4e-09 -
## 761 < 2e-16 < 2e-16 < 2e-16 < 2e-16 < 2e-16
##
## P value adjustment method: holm
table3
##
## H L
## [a] 154 73
## [e] 761 381
## [É™] 225 129
#post hoc residuals
chisq.test(table3, correct=F)$stdres
##
## H L
## [a] 0.5733416 -0.5733416
## [e] 0.5827610 -0.5827610
## [É™] -1.1618225 1.1618225
sig <- .05
sigadj <- sig/(nrow(table3)*ncol(table3))
sigadj
## [1] 0.008333333
qnorm(sigadj/2)
## [1] -2.638257
chi2 <- data3 %>%
filter(dm == "M") %>%
filter(stress == "2") %>%
filter(POS == "function")
# gender in S2 func
table2 <- table(chi2$realization, chi2$gender)
chisq.test(table2, correct=F)
##
## Pearson's Chi-squared test
##
## data: table2
## X-squared = 9.8681, df = 2, p-value = 0.007197
chi <- data3 %>%
filter(dm == "M") %>%
filter(realization != "[É™]") %>%
filter(POS == "function") %>%
filter(stress != 3) %>%
mutate(stress = ifelse(stress == "2", "S2", "S1"))
# Stress in cont, not func
table1 <- table(chi$realization, chi$stress)
chisq.test(table1, correct=F)
##
## Pearson's Chi-squared test
##
## data: table1
## X-squared = 0.00055649, df = 1, p-value = 0.9812
chi2 <- data3 %>%
filter(dm == "M") %>%
filter(realization != "[É™]") %>%
filter(stress == "2")
# POS in S2 and S1: [a]
table2 <- table(chi2$realization, chi2$POS)
chisq.test(table2, correct=F)
##
## Pearson's Chi-squared test
##
## data: table2
## X-squared = 40.973, df = 1, p-value = 1.543e-10
chi3 <- data3 %>%
filter(dm == "M") %>%
filter(realization != "[É™]") %>%
filter(giv == "soc") %>%
filter(POS == "content") %>%
filter(stress == "1")
#freq in S1, not in S2
#gender/age in S1
table3 <- table(chi3$realization, chi3$area)
chisq.test(table3, correct=F)
##
## Pearson's Chi-squared test
##
## data: table3
## X-squared = 2.5061, df = 1, p-value = 0.1134
re <- data3 %>%
mutate(block1 = as.factor(block1)) %>%
mutate(block_all = as.factor(block_all)) %>%
mutate(reduction = ifelse(realization == "[aɪ]", 1, 0)) %>%
group_by(area, age, gender, block_all,
#block1,
ag, Filename) %>%
summarise(mean = mean(reduction)*100) %>%
ungroup() %>%
group_by(area, age, gender, block_all,
#block1,
ag) %>%
summarise(n = n(), m = mean(mean), sd = sd(mean),
se = sd/sqrt(n), max = m+se
)
## `summarise()` has grouped output by 'area', 'age', 'gender', 'block_all', 'ag'.
## You can override using the `.groups` argument.
## `summarise()` has grouped output by 'area', 'age', 'gender', 'block_all'. You
## can override using the `.groups` argument.
f_re <- ggplot(re, aes(x= ag, y = m, fill = block_all)) +
geom_bar(stat = "identity",
position = position_dodge(preserve = "single"),
width = .7,
color = "black") +
#labs(x = "Realization", y = "Duration (ms)") +
geom_errorbar(aes(ymin = m, ymax = max),
position = position_dodge(width = .7, preserve = "single"),
width = .6) +
theme_bw() +
scale_y_continuous(expand = c(0,0), limit = c(0, 100))
f_re
# stats1
data_re <- data3 %>%
select(area, gender, age, realization, block_all, Filename) %>%
mutate(area = ifelse(area == "S", "1", "0")) %>%
mutate(gender = ifelse(gender == "F", "1", "0")) %>%
mutate(age = ifelse(age == "Y", "1", "0")) %>%
mutate(reduce = ifelse(realization == "[aɪ]", "1", "0")) %>%
mutate(reduce = as.factor(reduce)) %>%
mutate(block_all = as.factor(block_all))
model_re <- glmer(reduce~block_all*age*gender*area + (1|Filename), data = data_re, family = binomial)
summary(model_re)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: reduce ~ block_all * age * gender * area + (1 | Filename)
## Data: data_re
##
## AIC BIC logLik deviance df.resid
## 12729.2 12907.7 -6339.6 12679.2 9292
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.6188 -0.9531 0.6178 0.9662 1.4018
##
## Random effects:
## Groups Name Variance Std.Dev.
## Filename (Intercept) 0.07675 0.277
## Number of obs: 9317, groups: Filename, 31
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.26783 0.18740 1.429 0.1530
## block_all2 -0.16502 0.15219 -1.084 0.2782
## block_all3 -0.09582 0.18365 -0.522 0.6018
## age1 -0.49887 0.26057 -1.915 0.0556 .
## gender1 0.32408 0.26182 1.238 0.2158
## area1 -0.07399 0.26191 -0.283 0.7775
## block_all2:age1 0.04716 0.20716 0.228 0.8199
## block_all3:age1 0.19014 0.24122 0.788 0.4306
## block_all2:gender1 -0.06872 0.20795 -0.330 0.7411
## block_all3:gender1 -0.44025 0.24688 -1.783 0.0745 .
## age1:gender1 0.12015 0.36494 0.329 0.7420
## block_all2:area1 0.12339 0.21162 0.583 0.5598
## block_all3:area1 -0.04357 0.24623 -0.177 0.8595
## age1:area1 0.21204 0.36570 0.580 0.5620
## gender1:area1 -0.46709 0.39643 -1.178 0.2387
## block_all2:age1:gender1 -0.15009 0.28437 -0.528 0.5976
## block_all3:age1:gender1 -0.04881 0.33020 -0.148 0.8825
## block_all2:age1:area1 -0.25877 0.29183 -0.887 0.3752
## block_all3:age1:area1 -0.18459 0.33203 -0.556 0.5783
## block_all2:gender1:area1 0.33979 0.32357 1.050 0.2937
## block_all3:gender1:area1 0.16880 0.38126 0.443 0.6580
## age1:gender1:area1 0.11665 0.53773 0.217 0.8283
## block_all2:age1:gender1:area1 0.28099 0.42990 0.654 0.5134
## block_all3:age1:gender1:area1 0.48683 0.49828 0.977 0.3286
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 24 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
re1 <- data3 %>%
filter(block_30 != "m") %>%
mutate(block_30 = ifelse(block_30 == "f", "First", "Last")) %>%
mutate(gender = ifelse(gender == "M", "Male", "Female")) %>%
mutate(age = ifelse(age == "O", "Old", "Young")) %>%
mutate(reduction = ifelse(realization == "[aɪ]", 1, 0)) %>%
group_by(age,
gender,
block_30, Filename) %>%
summarise(mean = mean(reduction)*100) %>%
ungroup() %>%
group_by(age,
gender,
block_30) %>%
summarise(n = n(), m = mean(mean), sd = sd(mean),
se = sd/sqrt(n), max = m+se) %>%
ungroup() %>%
mutate(annotation = ifelse(gender == "Females", "***", "N.S."))
## `summarise()` has grouped output by 'age', 'gender', 'block_30'. You can
## override using the `.groups` argument.
## `summarise()` has grouped output by 'age', 'gender'. You can override using the
## `.groups` argument.
f_re1 <- ggplot(re1, aes(x= age, y = m, fill = block_30)) +
geom_bar(stat = "identity",
position = position_dodge(preserve = "single"),
width = .7,
color = "black") +
labs(x = "Age", y = "Ratio of retained [aɪ]", fill = "Block") +
geom_errorbar(aes(ymin = m, ymax = max),
position = position_dodge(width = .7, preserve = "single"),
width = .5) +
facet_grid(~gender) +
theme_bw() +
scale_y_continuous(expand = c(0,0), limit = c(0, 100)) +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
legend.key.size = unit(0.3, 'cm'),
axis.title = element_text(size = 18),
#legend.position = c(0.9, 0.7),
panel.background=element_rect(colour="black", size = 1),
legend.background = element_rect(fill="white",
size=0.5, linetype="solid",
colour ="black"),
strip.background = element_rect(color = "black", size = 0.8),
axis.text.y = element_text(size = 16)) +
geom_signif(comparisons=list(c("Old", "Young")), annotations="*",
y_position = 90, tip_length = 0.1, vjust=0)
f_re1
#ggsave("f_block.png", dpi = 600, width = 7, height = 4)
data_re1 <- data3 %>%
select(area, gender, age, realization, dm, block_30, Filename) %>%
filter(block_30 != "m") %>%
#filter(block_30 == "l") %>%
#filter(gender == "F") %>%
#filter(area == "S") %>%
#filter(age == "O") %>%
mutate(block_30 = as.factor(block_30)) %>%
mutate(area = ifelse(area == "S", "1", "0")) %>%
mutate(gender = ifelse(gender == "F", "1", "0")) %>%
mutate(age = ifelse(age == "Y", "1", "0")) %>%
mutate(reduce = ifelse(realization == "[aɪ]", "1", "0")) %>%
#mutate(reduce = ifelse(dm == "M", "1", "0")) %>%
#mutate(stress = as.factor(stress)) %>%
mutate(reduce = as.factor(reduce))
model_re1 <- glmer(reduce~block_30*age*area*gender + (1|Filename), data = data_re1, family = binomial)
summary(model_re1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: reduce ~ block_30 * age * area * gender + (1 | Filename)
## Data: data_re1
##
## AIC BIC logLik deviance df.resid
## 7668.5 7781.2 -3817.3 7634.5 5572
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.5486 -0.9667 0.6457 0.9721 1.3293
##
## Random effects:
## Groups Name Variance Std.Dev.
## Filename (Intercept) 0.05973 0.2444
## Number of obs: 5589, groups: Filename, 31
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.227752 0.169654 1.342 0.1795
## block_30l 0.001884 0.166988 0.011 0.9910
## age1 -0.540987 0.234963 -2.302 0.0213 *
## area1 -0.094824 0.235304 -0.403 0.6870
## gender1 0.395732 0.235349 1.681 0.0927 .
## block_30l:age1 0.221749 0.219812 1.009 0.3131
## block_30l:area1 -0.057760 0.225090 -0.257 0.7975
## age1:area1 0.280087 0.327728 0.855 0.3928
## block_30l:gender1 -0.530990 0.222831 -2.383 0.0172 *
## age1:gender1 0.055426 0.326556 0.170 0.8652
## area1:gender1 -0.399094 0.354750 -1.125 0.2606
## block_30l:age1:area1 -0.290150 0.302721 -0.958 0.3378
## block_30l:age1:gender1 -0.065879 0.299509 -0.220 0.8259
## block_30l:area1:gender1 0.191273 0.343241 0.557 0.5774
## age1:area1:gender1 0.035786 0.480069 0.075 0.9406
## block_30l:age1:area1:gender1 0.615402 0.450333 1.367 0.1718
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 16 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
coefs <- data.frame(coef(summary(model_re1)))
coefs <- coefs %>%
mutate(Estimate = round(Estimate, 3)) %>%
mutate(Std..Error = round(Std..Error, 3)) %>%
mutate(z.value = round(z.value, 3)) %>%
mutate(Pr...z.. = round(Pr...z.., 4))
#write.csv(coefs, "table4.csv")
#realization and duration POS
sr4 <- data3 %>%
filter(dm != "D") %>%
group_by(gender, age, area, stress, realization, Filename, POS) %>%
mutate(tokens = n()) %>%
ungroup() %>%
mutate(order2 = ifelse(stress == "1", 1,
ifelse(stress == "2", 2,
ifelse(stress == "3", 3,
0)))) %>%
mutate(realization = ifelse(realization == "[aɪ]", "Retained /aɪ/", "Reduced /aɪ/")) %>%
mutate(order1 = ifelse(realization == "Retained /aɪ/", 1, 2)) %>%
mutate(ag = paste0(area, age, gender)) %>%
group_by(realization, stress, order2, order1, Filename, POS) %>%
summarise(m = 1000*mean(dur)) %>%
ungroup() %>%
group_by(realization, stress, order2, POS, order1
#ag
) %>%
summarise(n = n(), mean = mean(m), se = sd(m)/sqrt(n),
max = mean+se, min = mean-se) %>%
ungroup() %>%
distinct() %>%
mutate(realization = as.factor(realization)) %>%
arrange(realization) %>%
mutate(realization = fct_reorder(realization, order1)) %>%
mutate(stress = as.factor(stress)) %>%
arrange(stress) %>%
mutate(stress = fct_reorder(stress, order2))
## `summarise()` has grouped output by 'realization', 'stress', 'order2',
## 'order1', 'Filename'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'realization', 'stress', 'order2', 'POS'.
## You can override using the `.groups` argument.
f1_pos <- ggplot(sr4, aes(x= POS, y = mean, fill = stress)) +
geom_bar(stat = "identity",
position = position_dodge(preserve = "single"),
width = .7,
#fill = "gray",
color = "black") +
labs(x = "Word category", y = "Duration (ms)", fill = "Stress") +
# , subtitle = "n ≥ 10") +
geom_errorbar(aes(ymin = mean, ymax = max),
position = position_dodge(width = .7, preserve = "single"),
width = .4) +
theme_bw() +
facet_grid(~realization) +
scale_y_continuous(expand = c(0,0), limit = c(0, 200)) +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 18),
legend.text = element_text(size = 14),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
axis.text.y = element_text(size = 16)) +
scale_fill_manual(
values = c("#e63946", "#329cff", "#ffbf46"),
#values = c("white", "dark gray", "black"),
limits = c("1", "2", "3"),
breaks = c("1", "2", "3"),
labels = c("S1", "S2", "S3")
)
f1_pos
#ggsave("f2_c.png", dpi = 600, width = 10, height = 5)
sr6 <- data3 %>%
filter(stress == 3) %>%
filter(POS == "function") %>%
filter(realization == "[aɪ]") %>%
group_by(word) %>%
summarise(tokens = n(), m = 1000*mean(dur)) %>%
ungroup() %>%
filter(tokens > 2)
f1_word <- ggplot(sr6, aes(x= word, y = m)) +
geom_bar(stat = "identity",
width = .7,
color = "black") +
labs(x = "Word", y = "Duration (ms)") +
theme_bw() +
scale_y_continuous(expand = c(0,0), limit = c(0, 250)) +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18, family = "Songti TC"),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 18),
legend.text = element_text(size = 14),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
axis.text.y = element_text(size = 16))
f1_word
#realization and duration freq
sr2 <-data3 %>%
filter(POS != "function") %>%
#filter(dm != "D") %>%
group_by(gender, age, area, stress, realization, Filename, freq, giv) %>%
mutate(tokens = n()) %>%
ungroup() %>%
#mutate(check = ifelse(tokens < 10, "ignore", "count")) %>%
mutate(order1 = ifelse(realization == "[aɪ]", 6,
ifelse(realization == "[eɪ]", 5,
ifelse(realization == "[a]", 3,
ifelse(realization == "[e]", 4,
ifelse(realization == "[É™]", 2,
1)))))) %>%
mutate(order2 = ifelse(stress == "1", 1,
ifelse(stress == "2", 2,
ifelse(stress == "3", 3,
0)))) %>%
mutate(realization = ifelse(realization == "[aɪ]", "Retained /aɪ/", "Reduced /aɪ/")) %>%
mutate(order1 = ifelse(realization == "Retained /aɪ/", 1, 2)) %>%
mutate(freq = ifelse(freq == "H", "High", "Low")) %>%
mutate(giv = ifelse(giv == "foc", "First mention", "Second mention")) %>%
mutate(ag = paste0(area, age, gender)) %>%
group_by(realization, stress, order2, Filename, freq, giv, order1) %>%
summarise(m = 1000*mean(dur)) %>%
ungroup() %>%
group_by(realization, stress, order2, freq, giv, order1) %>%
summarise(n = n(), mean = mean(m), se = sd(m)/sqrt(n),
max = mean+se, min = mean-se) %>%
ungroup() %>%
#filter(realization != "reduced") %>%
distinct() %>%
mutate(realization = as.factor(realization)) %>%
arrange(realization) %>%
mutate(realization = fct_reorder(realization, order1)) %>%
mutate(stress = as.factor(stress)) %>%
arrange(stress) %>%
mutate(stress = fct_reorder(stress, order2))
## `summarise()` has grouped output by 'realization', 'stress', 'order2',
## 'Filename', 'freq', 'giv'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'realization', 'stress', 'order2', 'freq',
## 'giv'. You can override using the `.groups` argument.
f1_freq <- ggplot(sr2, aes(x= freq, y = mean, fill = stress)) +
geom_bar(stat = "identity",
position = position_dodge(preserve = "single"),
width = .7,
#fill = "gray",
color = "black") +
labs(x = "Word frequency", y = "Duration (ms)", fill = "Stress") +
# , subtitle = "n ≥ 10") +
geom_errorbar(aes(ymin = mean, ymax = max),
position = position_dodge(width = .7, preserve = "single"),
width = .4) +
theme_bw() +
facet_grid(realization~giv) +
scale_y_continuous(expand = c(0,0), limit = c(0, 200)) +
theme(plot.title = element_text(size = 16, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 16),
strip.text.x = element_text(size = 16),
strip.text.y = element_text(size = 16),
legend.title = element_text(size = 16),
legend.text = element_text(size = 14),
axis.title = element_text(size = 16),
panel.background=element_rect(colour="black", size = 1),
axis.text.y = element_text(size = 16)) +
scale_fill_manual(
values = c("#e63946", "#329cff", "#ffbf46"),
limits = c("1", "2", "3"),
breaks = c("1", "2", "3"),
labels = c("S1", "S2", "S3")
)
f1_freq
#ggsave("f2freq.png", dpi = 600, width = 10, height = 7.5)
sr5 <- data3 %>%
filter(POS == "content") %>%
group_by(gender, age, area, stress, realization, Filename) %>%
mutate(tokens = n()) %>%
ungroup() %>%
mutate(order1 = ifelse(realization == "[aɪ]", 6,
ifelse(realization == "[eɪ]", 5,
ifelse(realization == "[a]", 3,
ifelse(realization == "[e]", 4,
ifelse(realization == "[É™]", 2,
1)))))) %>%
mutate(order2 = ifelse(stress == "1", 1,
ifelse(stress == "2", 2,
ifelse(stress == "3", 3,
0)))) %>%
mutate(realization = ifelse(realization == "[aɪ]", "[aɪ]", "reduced")) %>%
mutate(ag = paste0(area, age, gender)) %>%
group_by(realization, stress, order2, Filename, age, area) %>%
summarise(m = 1000*mean(dur)) %>%
ungroup() %>%
group_by(realization, stress, order2, age, area
#ag
) %>%
summarise(n = n(), mean = mean(m), se = sd(m)/sqrt(n),
max = mean+se, min = mean-se) %>%
ungroup() %>%
distinct() %>%
mutate(stress = as.factor(stress)) %>%
arrange(stress) %>%
mutate(stress = fct_reorder(stress, order2))
## `summarise()` has grouped output by 'realization', 'stress', 'order2',
## 'Filename', 'age'. You can override using the `.groups` argument.
## `summarise()` has grouped output by 'realization', 'stress', 'order2', 'age'.
## You can override using the `.groups` argument.
f1_age <- ggplot(sr5, aes(x= age, y = mean, fill = stress)) +
geom_bar(stat = "identity",
position = position_dodge(preserve = "single"),
width = .7,
#fill = "gray",
color = "black") +
labs(x = "Age", y = "Duration (ms)", fill = "Stress") +
# , subtitle = "n ≥ 10") +
geom_errorbar(aes(ymin = mean, ymax = max),
position = position_dodge(width = .7, preserve = "single"),
width = .4) +
theme_bw() +
facet_grid(area~realization) +
scale_y_continuous(expand = c(0,0), limit = c(0, 200)) +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 18),
legend.text = element_text(size = 14),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
axis.text.y = element_text(size = 16)) +
scale_fill_manual(
values = c("#e63946", "#329cff", "#ffbf46"),
limits = c("1", "2", "3"),
breaks = c("1", "2", "3"),
labels = c("S1", "S2", "S3")
)
f1_age
#ggsave("f2_c.png", dpi = 600, width = 10, height = 5)
#statistics
ddd <- data3 %>%
#filter(dm != "D") %>%
#filter(giv != "soc") %>%
#filter(area == "S") %>%
#filter(gender == "F") %>%
#filter(age == "Y") %>%
#filter(POS !="function") %>%
#filter(freq != "H") %>%
#filter(stress == "1") %>%
filter(realization == "[aɪ]") %>%
mutate(realization = ifelse(realization == "[aɪ]", "0", "1")) %>%
#mutate(gender = ifelse(gender == "M", "1", "0")) %>%
#mutate(age = ifelse(age == "Y", "1", "0")) %>%
#mutate(stress = ifelse(stress == "3", "1", "0")) %>%
#mutate(area = ifelse(area == "S", "1", "0")) %>%
#filter(realization == "0") %>%
mutate(pos = ifelse(pos == "tail", "1", "0")) %>%
mutate(onset1 = ifelse(onset1 == "cor", "1", "0")) %>%
mutate(context = ifelse(context == "else", "1", "0")) %>%
mutate(freq = as.factor(freq)) %>%
mutate(pos = as.factor(pos)) %>%
mutate(POS = as.factor(POS)) %>%
mutate(context = as.factor(context)) %>%
mutate(realization = as.factor(realization)) %>%
mutate(Filename = as.factor(Filename)) %>%
mutate(age = as.factor(age)) %>%
mutate(area = as.factor(area)) %>%
mutate(gender = as.factor(gender)) %>%
mutate(stress = as.factor(stress))
#model_d <- lmer(dur~realization*giv*freq*stress + (1|Filename), data = ddd)
#model_d <- lmer(dur~realization*stress*POS+age*area*gender + (1|Filename), data = ddd)
model_d <- lmer(dur~stress*POS + (1|Filename), data = ddd)
#model_d <- lmer(dur~giv*stress+age*area*gender + (1|Filename), data = ddd)
coefs <- data.frame(coef(summary(model_d)))
coefs <- coefs %>%
mutate(Estimate = round(Estimate, 4)) %>%
mutate(Std..Error = round(Std..Error, 4)) %>%
mutate(t.value = round(t.value, 4)) %>%
#mutate(p.z = 2*(1-pnorm(abs(coefs$t.value))))
mutate(p.z = ifelse(2*(1-pnorm(abs(coefs$t.value))) < 0.001, "<.001",
ifelse(2*(1-pnorm(abs(coefs$t.value))) < 0.01, "<.01",
ifelse(2*(1-pnorm(abs(coefs$t.value))) < 0.05, "<.05",
round(2*(1-pnorm(abs(coefs$t.value))), 4)))))
coefs
## Estimate Std..Error t.value p.z
## (Intercept) 0.1359 0.0019 69.7309 <.001
## stress1 -0.0175 0.0026 -6.7569 <.001
## stress3 0.0258 0.0025 10.3718 <.001
## POSfunction 0.0030 0.0018 1.7209 0.0853
## stress1:POSfunction -0.0150 0.0053 -2.8315 <.01
## stress3:POSfunction 0.0174 0.0075 2.3115 <.05
#formant and stress [ai]
f_r <- data3 %>%
select(age, gender, stress, area, Filename, dur, realization, freq, onset, onset1, tone, giv, order,
context, wl, pos, con, POS, p0_1, p0_2, p1_1, p1_2, p2_1, p2_2, p3_1, p3_2,
p4_1, p4_2, p5_1, p5_2, p6_1, p6_2, p7_1, p7_2, p8_1, p8_2, p9_1, p9_2) %>%
mutate(order1 = ifelse(stress == "3", 4,
ifelse(stress == "2", 3,
ifelse(stress == "1", 2,
1))))
fr_1 <- f_r %>%
gather("time", "formant", -age, -gender, -tone, -giv, -order1,
-stress, -area, -Filename, -dur, -realization, -order, -freq, -onset, -onset1,
-context, -wl, -pos, -con, -POS) %>%
na.omit()
fr_1$t <- str_replace(fr_1$time, "_(1|2)", "")
fr_1$t <- str_replace(fr_1$t, "p", "")
fr_1$f <- str_replace(fr_1$time, "p(0|1|2|3|4|5|6|7|8|9)_", "")
fr_2 <- fr_1 %>%
select(-time) %>%
mutate(ag = paste0(area, age, gender)) %>%
mutate(stress = ifelse(stress == 2, "S2", ifelse(stress == 3, "S3", "S1"))) %>%
mutate(formant = as.numeric(formant)) %>%
mutate(age = ifelse(age == "O", "Old", "Young")) %>%
mutate(area = ifelse(area == "N", "North", "South")) %>%
mutate(gender = ifelse(gender == "F", "Female", "Male")) %>%
filter(realization == "[aɪ]") %>%
mutate(stress = as.factor(stress)) %>%
arrange(stress) %>%
mutate(stress = fct_reorder(stress, order1)) %>%
mutate(f = ifelse(f == 1, "F1", "F2")) %>%
spread(f, formant) %>%
filter(realization == "[aɪ]") %>%
group_by(Filename, t) %>%
filter(F1 %in% F1[abs(scale(F1)) <= 3], F2 %in% F2[abs(scale(F2)) <= 3]) %>%
ungroup() %>%
filter(t != 9) %>%
filter(t != 0)
ai_s <- fr_2 %>%
filter(freq != "M") %>%
filter(realization == "[aɪ]") %>%
group_by(Filename, t) %>%
filter(F1 %in% F1[abs(scale(F1)) <= 3], F2 %in% F2[abs(scale(F2)) <= 3]) %>%
ungroup() %>%
filter(t != 9) %>%
filter(t != 0) %>%
group_by(Filename, age, gender, area, ag) %>%
mutate(F1 = scale(F1), F2 = scale(F2)) %>%
ungroup()
## Lobanov
ggplot(ai_s, aes(x = F2, y = F1, color = stress)) +
geom_smooth(aes(linetype = stress, fill = stress)) +
theme_minimal() +
scale_x_reverse() +
scale_y_reverse() +
facet_grid(age~area)+
theme_bw() +
scale_color_manual(labels = c("S1", "S2", "S3"),
values=c("#e63946", "#329cff", "#ffbf46")) +
scale_fill_manual(labels = c("S1", "S2", "S3"),
values = c("#e63946", "#329cff", "#ffbf46")) +
scale_linetype_manual(labels = c("S1", "S2", "S3"),
values=c("dashed", "solid", "longdash")) +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
#legend.position = c(0.93, 0.4),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16)) +
scale_shape_manual(values = 1:10) +
ggtitle("Lobanov-transformed vowel plot")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
f2_f1 <- ggplot() +
geom_smooth(data = ai_s, aes(x=t, y=F1, linetype = stress,
group = stress, color = stress, fill = stress),
size = 2, se = T) +
scale_color_manual(labels = c("S1", "S2", "S3"),
values=c("#e63946", "#329cff", "#ffbf46")) +
scale_fill_manual(labels = c("S1", "S2", "S3"),
values = c("#e63946", "#329cff", "#ffbf46")) +
scale_linetype_manual(labels = c("S1", "S2", "S3"),
values=c("dashed", "solid", "longdash")) +
labs(x = "Normalized time", y = "Frequency (z-score)", linetype = "Stress", color = "Stress",
#title = "Female",
fill = "Stress") +
theme_bw() +
#facet_grid(~gender) +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
f2_f2 <- ggplot() +
geom_smooth(data = ai_s, aes(x=t, y=F2, linetype = stress, group = stress,
color = stress, fill = stress),
size = 2, se = T) +
scale_color_manual(labels = c("S1", "S2", "S3"),
values=c("#e63946", "#329cff", "#ffbf46")) +
scale_fill_manual(labels = c("S1", "S2", "S3"),
values = c("#e63946", "#329cff", "#ffbf46")) +
scale_linetype_manual(labels = c("S1", "S2", "S3"),
values=c("dashed", "solid", "longdash")) +
labs(x = "Normalized time", y = "Frequency (z-score)", linetype = "Stress", color = "Stress",
#title = "Female",
fill = "Stress") +
#facet_grid(~gender)+
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
#legend.position = c(0.93, 0.4),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16))
#ggsave("f3_z.png", dpi = 600, width = 10, height = 6)
f2_f1 <- ggplot() +
geom_smooth(data = ai_s, aes(x=t, y=F1, linetype = stress,
group = stress, color = stress, fill = stress),
size = 2, se = T) +
scale_color_manual(labels = c("S1", "S2", "S3"),
values=c("#e63946", "#329cff", "#ffbf46")) +
scale_fill_manual(labels = c("S1", "S2", "S3"),
values = c("#e63946", "#329cff", "#ffbf46")) +
scale_linetype_manual(labels = c("S1", "S2", "S3"),
values=c("dashed", "solid", "longdash")) +
labs(x = "Normalized time", y = "Frequency (z-score)", linetype = "Stress", color = "Stress",
fill = "Stress") +
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
#legend.position = c(0.93, 0.4),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16))
f2_f2 <- ggplot() +
geom_smooth(data = ai_s, aes(x=t, y=F2, linetype = stress, group = stress,
color = stress, fill = stress),
size = 2, se = T) +
scale_color_manual(labels = c("S1", "S2", "S3"),
values=c("#e63946", "#329cff", "#ffbf46")) +
scale_fill_manual(labels = c("S1", "S2", "S3"),
values = c("#e63946", "#329cff", "#ffbf46")) +
scale_linetype_manual(labels = c("S1", "S2", "S3"),
values=c("dashed", "solid", "longdash")) +
labs(x = "Normalized time", y = "Frequency (z-score)", linetype = "Stress", color = "Stress",
fill = "Stress") +
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
#legend.position = c(0.93, 0.4),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16))
a_f <- ai_s %>%
filter(t %in% c(1,2,3)) %>%
group_by(Filename, order) %>%
mutate(time = 1) %>%
mutate(F1_max = max(F1)) %>%
mutate(F2_min = min(F2)) %>%
#select(-t) %>%
distinct() %>%
ungroup()
i_f <- ai_s %>%
filter(t %in% c(6,7,8)) %>%
group_by(Filename, order) %>%
mutate(time = 2) %>%
mutate(F1_min = min(F1)) %>%
mutate(F2_max = max(F2)) %>%
#select(-t) %>%
distinct() %>%
ungroup()
ai_f1 <- bind_rows(a_f, i_f)
ex <- ai_f1 %>%
mutate(t = as.numeric(t)) %>%
filter(Filename == "XMC") %>%
filter(order == 42)
ex1 <- fr_2 %>%
#filter(stress == "S3") %>%
#filter(freq == "L") %>%
mutate(t = as.numeric(t)) %>%
filter(Filename == "XMC") %>%
filter(order == 42)
highlighted_points <- data.frame(x = c(3, 8), y = c(809.9949, 538.4187))
hp <- data.frame(x = c(1, 7), y = c(1737.924, 2104.313))
f1 <- ggplot() +
geom_point(data = ex1, aes(x = t, y = F1), color = "blue", size = 4) +
geom_line(data = ex1, aes(x = t, y = F1), size = 1, color = "blue") +
geom_rect(data = highlighted_points,
aes(xmin = x - 0.3, xmax = x + 0.3, ymin = y - 80, ymax = y + 80),
color = "dark red", fill = NA, size = 1) +
theme_bw() +
scale_x_continuous(breaks = ex1$t) +
labs(x = "Time", y = "Formant frequencies (Hz)") +
geom_point(data = ex1, aes(x = t, y = F2), color = "blue", size = 4) +
geom_line(data = ex1, aes(x = t, y = F2), size = 1, color = "blue") +
geom_rect(data = hp,
aes(xmin = x - 0.3, xmax = x + 0.3, ymin = y - 80, ymax = y + 80),
color = "dark red", fill = NA, size = 1) +
theme(plot.title = element_text(size = 16, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 14),
strip.text.x = element_text(size = 16),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 16),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 12))
f1
#ggsave("ex1.png", dpi = 600, width = 8, height = 6)
ai_s <- fr_2 %>%
#filter(giv != "foc") %>%
filter(POS == "content") %>%
mutate(ra = paste(area, age))
a_s <- ai_s %>%
filter(t %in% c(1,2,3)) %>%
group_by(Filename, order) %>%
mutate(time = 1) %>%
mutate(F1 = max(F1)) %>%
mutate(F2 = min(F2)) %>%
select(-t) %>%
distinct() %>%
ungroup()
i_s <- ai_s %>%
filter(t %in% c(6,7,8)) %>%
group_by(Filename, order) %>%
mutate(time = 2) %>%
mutate(F1 = min(F1)) %>%
mutate(F2 = max(F2)) %>%
select(-t) %>%
distinct() %>%
ungroup()
ai_s1 <- bind_rows(a_s, i_s)
aim_s <- ai_s1 %>%
group_by(time,
freq,
giv,
stress) %>%
reframe(F1 = mean(F1), F2 = mean(F2)) %>%
mutate(giv = ifelse(giv == "foc", "First-mention", "Second-mention")) %>%
rename(Frequency = freq) %>%
ungroup()
## Lobanov
ggplot(aim_s, aes(x = F2, y = F1, color = stress)) +
geom_point(aes(shape = Frequency), size = 4) +
geom_line(aes(linetype = Frequency), linewidth = .8) +
#geom_smooth(aes(linetype = stress, fill = stress)) +
theme_minimal() +
labs(x = "Normalized F2", y = "Normalized F1") +
scale_x_reverse() +
scale_y_reverse() +
facet_grid(~giv)+
theme_bw() +
scale_color_manual(labels = c("S1", "S2", "S3"),
values=c("#e63946", "#329cff", "#ffbf46")) +
scale_fill_manual(labels = c("S1", "S2", "S3"),
values = c("#e63946", "#329cff", "#ffbf46")) +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16)) +
scale_shape_manual(values = 1:10) +
ggtitle("Lobanov-transformed vowel plot")
#ggsave("f3_freq.png", dpi = 600, width = 8, height = 5)
ai_c <- fr_2 %>%
#filter(giv != "foc") %>%
#filter(stress == "S2") %>%
mutate(ra = paste(area, age))
a_c <- ai_c %>%
filter(t %in% c(1,2,3)) %>%
group_by(Filename, order) %>%
mutate(time = 1) %>%
mutate(F1 = max(F1)) %>%
mutate(F2 = min(F2)) %>%
select(-t) %>%
distinct() %>%
ungroup()
i_c <- ai_c %>%
filter(t %in% c(6,7,8)) %>%
group_by(Filename, order) %>%
mutate(time = 2) %>%
mutate(F1 = min(F1)) %>%
mutate(F2 = max(F2)) %>%
select(-t) %>%
distinct() %>%
ungroup()
ai_c1 <- bind_rows(a_c, i_c)
aim_c <- ai_c1 %>%
group_by(time, stress,
POS) %>%
reframe(F1 = mean(F1), F2 = mean(F2)) %>%
ungroup()
## Lobanov
ggplot(aim_c, aes(x = F2, y = F1, color = stress)) +
geom_point(aes(shape = POS), size = 4) +
geom_line(aes(linetype = POS), linewidth = .8) +
#geom_smooth(aes(linetype = POS, fill = POS)) +
theme_minimal() +
labs(x = "Normalized F2", y = "Normalized F1") +
scale_x_reverse() +
scale_y_reverse() +
theme_bw() +
scale_color_manual(labels = c("S1", "S2", "S3"),
values=c("#e63946", "#329cff", "#ffbf46")) +
scale_fill_manual(labels = c("S1", "S2", "S3"),
values = c("#e63946", "#329cff", "#ffbf46")) +
scale_linetype_manual(labels = c("content", "function"),
values=c("solid", "longdash")) +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16)) +
scale_shape_manual(values = 1:10) +
ggtitle("Lobanov-transformed vowel plot")
#ggsave("f3_c.png", dpi = 600, width = 8, height = 5)
f2_f1 <- ggplot() +
geom_smooth(data = ai_c, aes(x=t, y=F1, linetype = POS, group = POS, color = POS, fill = POS),
size = 2, se = T) +
scale_color_manual(labels = c("content", "function"),
values = c("#329cff", "#e63946")) +
scale_fill_manual(labels = c("content", "function"),
values = c("#329cff", "#e63946")) +
scale_linetype_manual(labels = c("content", "function"),
values=c("solid", "longdash")) +
labs(x = "Normalized time", y = "Frequency (z-score)", linetype = "Word category",
color = "Word category",
fill = "Word category") +
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16))
f2_f2 <- ggplot() +
geom_smooth(data = ai_c, aes(x=t, y=F2, linetype = POS, group = POS, color = POS, fill = POS),
size = 2, se = T) +
scale_color_manual(labels = c("content", "function"),
values = c("#329cff", "#e63946")) +
scale_fill_manual(labels = c("content", "function"),
values = c("#329cff", "#e63946")) +
scale_linetype_manual(labels = c("content", "function"),
values=c("solid", "longdash")) +
labs(x = "", y = "Frequency (z-score)", linetype = "Word category",
color = "Word category",
fill = "Word category") +
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16))
ai_f <- fr_2 %>%
filter(giv != "foc") %>%
filter(POS == "content") %>%
filter(stress == "S2") %>%
mutate(freq = ifelse(freq == "H", "High", "Low")) %>%
mutate(ra = paste(area, age))
a_f <- ai_f %>%
filter(t %in% c(1,2,3)) %>%
group_by(Filename, order) %>%
mutate(time = 1) %>%
mutate(F1 = max(F1)) %>%
mutate(F2 = min(F2)) %>%
select(-t) %>%
distinct() %>%
ungroup()
i_f <- ai_f %>%
filter(t %in% c(6,7,8)) %>%
group_by(Filename, order) %>%
mutate(time = 2) %>%
mutate(F1 = min(F1)) %>%
mutate(F2 = max(F2)) %>%
select(-t) %>%
distinct() %>%
ungroup()
ai_f1 <- bind_rows(a_f, i_f)
aim_f <- ai_f1 %>%
group_by(time, freq) %>%
reframe(F1 = mean(F1), F2 = mean(F2)) %>%
ungroup()
## Lobanov
ggplot(aim_f, aes(x = F2, y = F1, color = freq)) +
geom_point(aes(shape = freq)) +
geom_line(aes(linetype = freq)) +
theme_minimal() +
scale_x_reverse() +
scale_y_reverse() +
theme_bw() +
scale_color_manual(labels = c("High", "Low"),
values = c("green","#329cff")) +
scale_fill_manual(labels = c("High", "Low"),
values = c("green","#329cff")) +
scale_linetype_manual(labels = c("High", "Low"),
values=c("longdash", "solid")) +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
#legend.position = c(0.93, 0.4),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16)) +
scale_shape_manual(values = 1:10) +
ggtitle("Lobanov-transformed vowel plot")
#ggsave("f3_z.png", dpi = 600, width = 10, height = 6)
ai_st <- ai_c1 %>%
filter(giv == "foc") %>%
#filter(stress == "S1") %>%
mutate(stress = ifelse(stress == "S1", 2, ifelse(stress == "S2", 1, 3))) %>%
mutate(stress = as.factor(stress)) %>%
mutate(area = as.factor(area)) %>%
#filter(gender == "Female") %>%
#filter(age != "Old") %>%
#filter(area == "North") %>%
filter(POS != "function") %>%
filter(freq != "H") %>%
filter(time == 1) %>%
mutate(freq = as.factor(freq)) %>%
mutate(POS = as.factor(POS)) %>%
#mutate(time = as.numeric(time)) %>%
filter(realization == "[aɪ]")
#model_f <- lmer(F2~freq*t + (1 | Filename), data = ai_f1_t)
model_f <- lmer(F1~stress + age*gender*area + (1 | Filename), data = ai_st)
#model_f <- lmer(F1~POS*stress + age*gender*area + (1 | Filename), data = ai_st)
#model_f <- lmer(F1~area*gender*age + (1 | Filename), data = ai_st)
#model_f <- lmer(F1~stress + age*gender*area + (1 | Filename), data = ai_st)
#model_f <- lmer(F2~area*t + (1 | Filename), data = ai_st)
#model_f <- lmer(formant~stress*t + (1 | Filename), data = ai_f1_t)
coefs <- data.frame(coef(summary(model_f)))
coefs <- coefs %>%
mutate(Estimate = round(Estimate, 4)) %>%
mutate(Std..Error = round(Std..Error, 4)) %>%
mutate(t.value = round(t.value, 4)) %>%
mutate(p.z = round(2*(1-pnorm(abs(coefs$t.value))), 4)) %>%
mutate(p.z = ifelse(2*(1-pnorm(abs(coefs$t.value))) < 0.001, "<.001",
ifelse(2*(1-pnorm(abs(coefs$t.value))) < 0.01, "<.01",
ifelse(2*(1-pnorm(abs(coefs$t.value))) < 0.05, "<.05",
round(2*(1-pnorm(abs(coefs$t.value))), 4)))))
coefs
## Estimate Std..Error t.value p.z
## (Intercept) 909.2039 31.8001 28.5912 <.001
## stress2 -9.6881 10.3224 -0.9385 0.348
## stress3 49.6610 7.5243 6.6001 <.001
## ageYoung -60.2213 44.8608 -1.3424 0.1795
## genderMale -233.2704 44.9772 -5.1864 <.001
## areaSouth -40.1602 48.7771 -0.8233 0.4103
## ageYoung:genderMale 137.6632 63.5721 2.1655 <.05
## ageYoung:areaSouth 130.5093 66.3142 1.9680 <.05
## genderMale:areaSouth 93.7502 66.3456 1.4131 0.1576
## ageYoung:genderMale:areaSouth -213.4432 91.9026 -2.3225 <.05
#summary(model_f)
ai_age <- fr_2 %>%
filter(t != 9) %>%
filter(t != 0) %>%
group_by(Filename, t) %>%
filter(F1 %in% F1[abs(scale(F1)) <= 3], F2 %in% F2[abs(scale(F2)) <= 3]) %>%
ungroup() %>%
group_by(Filename, age, area, ag, t) %>%
mutate(F1 = scale(F1), F2 = scale(F2)) %>%
ungroup() %>%
#filter(giv == "foc") %>%
#filter(stress == "S2") %>%
filter(POS == "content") %>%
#filter(context == "else") %>%
#filter(onset1 == "dor") %>%
filter(realization == "[aɪ]") %>%
mutate(ra = paste(area, age))
a_age <- ai_age %>%
filter(t %in% c(1,2,3)) %>%
group_by(Filename, order) %>%
mutate(time = 1) %>%
mutate(F1 = max(F1)) %>%
mutate(F2 = min(F2)) %>%
select(-t) %>%
distinct() %>%
ungroup() #%>%
# group_by(Filename) %>%
# mutate(num = seq(1,length(Filename))) %>%
# ungroup() %>%
# filter(num <= 20)
i_age <- ai_age %>%
filter(t %in% c(6,7,8)) %>%
group_by(Filename, order) %>%
mutate(time = 2) %>%
mutate(F1 = min(F1)) %>%
mutate(F2 = max(F2)) %>%
select(-t) %>%
distinct() %>%
ungroup() #%>%
# group_by(Filename) %>%
# mutate(num = seq(1,length(Filename))) %>%
# ungroup() %>%
# filter(num <= 20)
ai_age1 <- bind_rows(a_age, i_age)
aim_age <- ai_age1 %>%
group_by(age, area, time) %>%
reframe(F1 = mean(F1), F2 = mean(F2)) %>%
ungroup()
ggplot(aim_age, aes(x = F2, y = F1,
color = area
)) +
geom_point(aes(shape = area), size = 4) +
geom_line(aes(linetype = area) , linewidth = .8) +
#geom_smooth(aes(linetype = age, fill = age)) +
theme_minimal() +
labs(x = "Normalized F2", y = "Normalized F1") +
scale_x_reverse() +
scale_y_reverse() +
scale_color_manual(labels = c("North", "South"),
values = c("#329cff","#ff006e")) +
scale_fill_manual(labels = c("North", "South"),
values = c("#329cff","#ff006e")) +
scale_linetype_manual(labels = c("North", "South"),
values=c("solid", "longdash")) +
theme_bw() +
facet_grid(~age) +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
#legend.position = c(0.93, 0.4),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16)) +
scale_shape_manual(values = 1:10) +
ggtitle("Lobanov-transformed vowel plot")
#ggsave("f3_age.png", dpi = 600, width = 8, height = 4)
ai_area <- fr_2 %>%
filter(freq != "M") %>%
filter(stress == "S3") %>%
filter(POS == "content") %>%
#filter(freq == "L") %>%
mutate(freq = ifelse(freq == "H", "High", "Low")) %>%
filter(realization == "[aɪ]") %>%
mutate(order3 = ifelse(area == "South", 1, 2)) %>%
group_by(Filename, t) %>%
filter(F1 %in% F1[abs(scale(F1)) <= 3], F2 %in% F2[abs(scale(F2)) <= 3]) %>%
ungroup() %>%
filter(t != 9) %>%
filter(t != 0) %>%
group_by(Filename, age, gender, area, ag) %>%
mutate(F1 = scale(F1), F2 = scale(F2)) %>%
mutate(region = area) %>%
mutate(freq = as.factor(area)) %>%
arrange(area) %>%
mutate(area = fct_reorder(area, order3)) %>%
filter(F1 <= 6) %>%
ungroup()
## Warning: Using one column matrices in `filter()` was deprecated in dplyr 1.1.0.
## ℹ Please use one dimensional logical vectors instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
ggplot(ai_area, aes(x = F2, y = F1, color = area)) +
#geom_point(aes(shape = stress),alpha = 0.5) +
geom_smooth(aes(linetype = area, fill = area)) +
#stat_ellipse() +
theme_minimal() +
scale_x_reverse() +
scale_y_reverse() +
scale_color_manual(labels = c("North", "South"),
values = c("#329cff", "#fb5607")) +
scale_fill_manual(labels = c("North", "South"),
values = c("#329cff", "#fb5607")) +
scale_linetype_manual(labels = c("North", "South"),
values=c("solid","longdash")) +
facet_grid(gender~age) +
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
#legend.position = c(0.93, 0.4),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16)) +
scale_shape_manual(values = 1:10) +
ggtitle("Lobanov-transformed vowel plot")
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
f2_f1 <- ggplot() +
geom_smooth(data = ai_area, aes(x=t, y=F1, linetype = area, group = area,
color = area, fill = area),
size = 2, se = T) +
scale_color_manual(labels = c("North", "South"),
values = c("#ff006e", "#329cff")) +
scale_fill_manual(labels = c("North", "South"),
values = c("#ff006e", "#329cff")) +
scale_linetype_manual(labels = c("North", "South"),
values=c("longdash","solid")) +
labs(x = "", y = "Frequency (z-score)", linetype = "Region",
color = "Region",
fill = "Region") +
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
#legend.position = c(0.93, 0.4),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16))
f2_f2 <- ggplot() +
geom_smooth(data = ai_age, aes(x=t, y=F2, linetype = area, group = area,
color = area, fill = area),
size = 2, se = T) +
scale_color_manual(labels = c("North", "South"),
values = c("#329cff", "#fb5607")) +
scale_fill_manual(labels = c("North", "South"),
values = c("#329cff", "#fb5607")) +
scale_linetype_manual(labels = c("North", "South"),
values=c("solid","longdash")) +
labs(x = "", y = "Frequency (z-score)", linetype = "Region",
color = "Region",
#title = "Female",
fill = "Region") +
#facet_grid(~gender)+
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
#legend.position = c(0.93, 0.4),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16))
#ggsave("f3_area.png", dpi = 600, width = 8, height = 5)
#tonal realization (stress)
tr <- data3 %>%
select(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, Filename, age, gender, area, freq, realization, stress, label, order, dur, onset, onset1, context, wl, pos, con, POS, n, tone) %>%
mutate(order = ifelse(stress == "3", 4,
ifelse(stress == "2", 3,
ifelse(stress == "1", 2,
1))))
tr1 <- tr %>%
gather("time", "pitch", -age, -gender,
-stress, -area, -Filename, -dur, -realization, -order, -freq, -onset, -onset1,
-context, -wl, -pos, -con, -POS, -label, -n, -tone)
tr1$t <- str_replace(tr1$time, "p", "")
tr2 <- tr1 %>%
filter(stress != 0) %>%
filter(tone != 0) %>%
#filter(pitch != is.na(pitch)) %>%
na.omit() %>%
mutate(ag = paste0(area, age, gender)) %>%
group_by(Filename, age, gender, area, ag) %>%
mutate(mean = mean(pitch), z = scale(pitch)) %>%
ungroup() %>%
mutate(tone = paste0("T", tone)) %>%
mutate(stress = ifelse(stress == 2, "S2", ifelse(stress == 3, "S3", "S1"))) %>%
mutate(stress = as.factor(stress)) %>%
arrange(stress) %>%
mutate(stress = fct_reorder(stress, order))
f_pm <- ggplot() +
geom_smooth(data = tr2,
aes(x=t, y=z, color = stress, linetype = stress, group = stress,
fill = stress), linewidth = 1, se = T) +
scale_color_manual(labels = c("S1", "S2", "S3"),
values=c("#e63946", "#329cff", "#ffbf46")) +
scale_fill_manual(labels = c("S1", "S2", "S3"),
values = c("#e63946", "#329cff", "#ffbf46")) +
scale_linetype_manual(labels = c("S1", "S2", "S3"),
values=c("dashed", "solid", "longdash")) +
labs(x = "Normalized time", y = "Pitch (z-score)", linetype = "Stress", color = "Stress", fill = "Stress") +
facet_grid(~tone)+
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 14),
strip.text.x = element_text(size = 22),
legend.title = element_text(size = 18),
legend.text = element_text(size = 14),
axis.title = element_text(size = 18),
axis.text.y = element_text(size = 14))
f_pm
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#ggsave("f7.png", dpi = 600, width = 10, height = 5)
library(mgcv)
## Warning: package 'mgcv' was built under R version 4.3.1
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:lme4':
##
## lmList
## The following object is masked from 'package:dplyr':
##
## collapse
## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
library(plotly)
## Warning: package 'plotly' was built under R version 4.3.1
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(itsadug)
## Loading required package: plotfunctions
##
## Attaching package: 'plotfunctions'
## The following object is masked from 'package:plotly':
##
## add_bars
## The following object is masked from 'package:RVAideMemoire':
##
## se
## The following object is masked from 'package:ggpubr':
##
## get_palette
## The following object is masked from 'package:ggplot2':
##
## alpha
## Loaded package itsadug 2.4 (see 'help("itsadug")' ).
pr1 <- tr2 %>%
mutate(tone = as.factor(tone)) %>%
mutate(area = as.factor(area)) %>%
mutate(age = as.factor(age)) %>%
mutate(Filename= as.factor(Filename)) %>%
filter(tone == "T1") %>%
mutate(stress = ifelse(stress == "S2", "R1", ifelse(stress == "S1", "R2", "R3"))) %>%
mutate(stress = as.factor(stress)) %>%
mutate(t = as.numeric(t))
pr2 <- tr2 %>%
mutate(tone = as.factor(tone)) %>%
mutate(area = as.factor(area)) %>%
mutate(age = as.factor(age)) %>%
mutate(Filename= as.factor(Filename)) %>%
filter(tone == "T2") %>%
mutate(stress = ifelse(stress == "S2", "R1", ifelse(stress == "S1", "R2", "R3"))) %>%
mutate(stress = as.factor(stress)) %>%
mutate(t = as.numeric(t))
pr3 <- tr2 %>%
mutate(tone = as.factor(tone)) %>%
mutate(area = as.factor(area)) %>%
mutate(age = as.factor(age)) %>%
mutate(Filename= as.factor(Filename)) %>%
filter(tone == "T3") %>%
mutate(stress = ifelse(stress == "S2", "R1", ifelse(stress == "S1", "R2", "R3"))) %>%
mutate(stress = as.factor(stress)) %>%
mutate(t = as.numeric(t))
pr4 <- tr2 %>%
mutate(tone = as.factor(tone)) %>%
mutate(area = as.factor(area)) %>%
mutate(age = as.factor(age)) %>%
mutate(Filename= as.factor(Filename)) %>%
filter(tone == "T4") %>%
mutate(stress = ifelse(stress == "S2", "R1", ifelse(stress == "S1", "R2", "R3"))) %>%
mutate(stress = as.factor(stress)) %>%
mutate(t = as.numeric(t))
gam_model1 <- bam(z ~ stress + s(t, by = stress) + s(t, Filename, bs="fs", m=1), data = pr1)
gam_model2 <- bam(z ~ stress + s(t, by = stress) + s(t, Filename, bs="fs", m=1), data = pr2)
gam_model3 <- bam(z ~ stress + s(t, by = stress) + s(t, Filename, bs="fs", m=1), data = pr3)
gam_model4 <- bam(z ~ stress + s(t, by = stress) + s(t, Filename, bs="fs", m=1), data = pr4)
# Plot the GAM
#itsadug::plot_smooth(gam_model1, view="t", plot_all="stress", rm.ranef=TRUE)
dev.new(width = 8, height = 15, unit="in")
par(mar=c(2,2,2,2))
layout(matrix(c(1, 2, 3, 4, 5, 6, 7, 8), nrow = 4,
ncol = 2, byrow = TRUE),
widths=c(1, 1), heights=c(1, 1, 1, 1))
itsadug::plot_diff(gam_model1, comp = list(stress=c("R1","R2")), rm.ranef=F, view="t",
hide.label = TRUE, main = "Tone1: S2 - S1 difference")
## Summary:
## * t : numeric predictor; with 100 values ranging from 0.000000 to 9.000000.
## * Filename : factor; set to the value(s): GYX.
##
## t window(s) of significant difference(s):
## 4.909091 - 9.000000
itsadug::plot_diff(gam_model1, comp = list(stress=c("R3","R2")), rm.ranef=F, view="t",
hide.label = TRUE, main = "Tone1: S3 - S2 difference")
## Summary:
## * t : numeric predictor; with 100 values ranging from 0.000000 to 9.000000.
## * Filename : factor; set to the value(s): GYX.
##
## t window(s) of significant difference(s):
## 0.000000 - 9.000000
itsadug::plot_diff(gam_model2, comp = list(stress=c("R1","R2")), rm.ranef=F, view="t",
hide.label = TRUE, main = "Tone2: S2 - S1 difference")
## Summary:
## * t : numeric predictor; with 100 values ranging from 0.000000 to 9.000000.
## * Filename : factor; set to the value(s): LJS.
##
## t window(s) of significant difference(s):
## 0.000000 - 5.909091
## 6.818182 - 9.000000
itsadug::plot_diff(gam_model2, comp = list(stress=c("R3","R2")), rm.ranef=F, view="t",
hide.label = TRUE, main = "Tone2: S3 - S2 difference")
## Summary:
## * t : numeric predictor; with 100 values ranging from 0.000000 to 9.000000.
## * Filename : factor; set to the value(s): LJS.
##
## t window(s) of significant difference(s):
## 0.000000 - 2.090909
## 3.363636 - 9.000000
itsadug::plot_diff(gam_model3, comp = list(stress=c("R1","R2")), rm.ranef=F, view="t",
hide.label = TRUE, main = "Tone3: S2 - S1 difference")
## Summary:
## * t : numeric predictor; with 100 values ranging from 0.000000 to 9.000000.
## * Filename : factor; set to the value(s): CZX1.
##
## t window(s) of significant difference(s):
## 1.454545 - 8.454545
itsadug::plot_diff(gam_model3, comp = list(stress=c("R3","R2")), rm.ranef=F, view="t",
hide.label = TRUE, main = "Tone3: S3 - S2 difference")
## Summary:
## * t : numeric predictor; with 100 values ranging from 0.000000 to 9.000000.
## * Filename : factor; set to the value(s): CZX1.
##
## t window(s) of significant difference(s):
## 1.090909 - 8.636364
itsadug::plot_diff(gam_model4, comp = list(stress=c("R1","R2")), rm.ranef=F, view="t",
hide.label = TRUE, main = "Tone4: S2 - S1 difference")
## Summary:
## * t : numeric predictor; with 100 values ranging from 0.000000 to 9.000000.
## * Filename : factor; set to the value(s): XMC.
##
## t window(s) of significant difference(s):
## 0.000000 - 3.454545
## 4.272727 - 9.000000
itsadug::plot_diff(gam_model4, comp = list(stress=c("R3","R2")), rm.ranef=F, view="t",
hide.label = TRUE, main = "Tone4: S3 - S2 difference")
## Summary:
## * t : numeric predictor; with 100 values ranging from 0.000000 to 9.000000.
## * Filename : factor; set to the value(s): XMC.
##
## t window(s) of significant difference(s):
## 0.000000 - 3.636364
## 4.545455 - 9.000000
# Summary of the GAM
summary(gam_model1)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## z ~ stress + s(t, by = stress) + s(t, Filename, bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.28402 0.04538 6.259 4.12e-10 ***
## stressR2 -0.15046 0.05423 -2.774 0.00555 **
## stressR3 0.90618 0.03515 25.781 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(t):stressR1 3.028 3.764 18.077 < 2e-16 ***
## s(t):stressR2 1.000 1.000 10.185 0.00142 **
## s(t):stressR3 1.630 2.023 3.000 0.05166 .
## s(t,Filename) 27.223 278.000 1.176 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.147 Deviance explained = 15.2%
## fREML = 8775.5 Scale est. = 0.87923 n = 6439
summary(gam_model2)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## z ~ stress + s(t, by = stress) + s(t, Filename, bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.39616 0.02010 -19.71 <2e-16 ***
## stressR2 0.12913 0.01110 11.64 <2e-16 ***
## stressR3 0.57419 0.02849 20.15 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(t):stressR1 5.081 6.119 23.139 <2e-16 ***
## s(t):stressR2 2.833 3.475 44.968 <2e-16 ***
## s(t):stressR3 3.609 4.471 38.713 <2e-16 ***
## s(t,Filename) 93.139 278.000 2.942 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0622 Deviance explained = 6.52%
## fREML = 40686 Scale est. = 0.64949 n = 33716
summary(gam_model3)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## z ~ stress + s(t, by = stress) + s(t, Filename, bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.61686 0.11783 -5.235 1.79e-07 ***
## stressR2 0.31766 0.08078 3.933 8.64e-05 ***
## stressR3 -0.27278 0.04738 -5.757 9.64e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(t):stressR1 3.510 4.283 14.301 <2e-16 ***
## s(t):stressR2 1.000 1.000 1.777 0.183
## s(t):stressR3 3.703 4.566 10.755 <2e-16 ***
## s(t,Filename) 67.381 272.000 2.022 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.228 Deviance explained = 25.1%
## fREML = 2908.9 Scale est. = 0.54169 n = 2525
summary(gam_model4)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## z ~ stress + s(t, by = stress) + s(t, Filename, bs = "fs", m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.28118 0.01755 16.02 <2e-16 ***
## stressR2 0.03675 0.01612 2.28 0.0226 *
## stressR3 0.01536 0.02647 0.58 0.5617
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(t):stressR1 2.855 3.486 372.268 <2e-16 ***
## s(t):stressR2 1.000 1.000 1.508 0.219
## s(t):stressR3 2.967 3.686 141.601 <2e-16 ***
## s(t,Filename) 69.773 278.000 1.619 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## R-sq.(adj) = 0.0978 Deviance explained = 9.96%
## fREML = 53744 Scale est. = 0.91783 n = 38993
================================================================================= #citation forms
ai_GY <- read_csv("data - read.csv")
## Rows: 1638 Columns: 23
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Filename, Segment_label
## dbl (21): dur, p0_1, p0_2, p1_1, p1_2, p2_1, p2_2, p3_1, p3_2, p4_1, p4_2, p...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#ai_GY <- read_csv("read330.csv")
#ai_GY <- read_csv("data - read2.csv")
ai_GY1 <- ai_GY %>%
#na.omit() %>%
gather("time", "formant", -Filename, -dur, -Segment_label)
##data pre-processing
ai_GY1$t <- str_replace(ai_GY1$time, "_(1|2)", "")
ai_GY1$t <- str_replace(ai_GY1$t, "p", "")
ai_GY1$f <- str_replace(ai_GY1$time, "p(0|1|2|3|4|5|6|7|8|9)_", "")
ai_GY1$lan <- str_replace(ai_GY1$Filename, "^[A-Z]+_[A-Z]+_[a-z]+_", "")
ai_GY1$Filename <- str_replace(ai_GY1$Filename, "_GY", "")
ai_GY1$Filename <- str_replace(ai_GY1$Filename, "_TW", "")
ai_GY1$group <- str_replace(ai_GY1$Filename, "_[A-Z]+_[a-z]+", "")
ai_GY1$Filename <- str_replace(ai_GY1$Filename, "^[SNYOFM]{3}_", "")
ai_GY1$word <- str_replace(ai_GY1$Filename, "[A-Z]+_", "")
ai_GY1$Filename <- str_replace(ai_GY1$Filename, "_[a-z]+", "")
ai_GY1$gender <- str_replace(ai_GY1$group, "[SNYO]{2}", "")
ai_GY1$area <- str_replace(ai_GY1$group, "[YOFM]{2}", "")
ai_GY1$age <- str_replace(ai_GY1$group, "[SN]", "")
ai_GY1$age <- str_replace(ai_GY1$age, "[FM]", "")
ai_GY2 <- ai_GY1 %>%
filter(Filename != "ZMH") %>%
mutate(vowel = "ai")
##contextual influence
ai_word <- read_csv("read_word.csv")
## Rows: 40 Columns: 2
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): word, con
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ai_read <- right_join(ai_GY2, ai_word, by = "word")
fluency <- read_csv("fluency.csv")
## Rows: 40 Columns: 3
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Filename
## dbl (2): TW, CN
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ai_read2 <- right_join(ai_read, fluency, by = "Filename")
ai_read1 <- ai_read2 %>%
filter(Segment_label == "ai") %>%
mutate(proficiency = TW/CN) %>%
mutate(context = ifelse(con %in% c("zh", "ch", "sh", "r", "z", "c", "s", "i", "y", "d", "t", "n", "l", "j", "q", "x"), "cor",
ifelse(con %in% c("b", "p", "m", "f", "w"), "lab",
ifelse(con %in% c("g", "k", "h", "o", "u", "e", "a"), "dor", "else"))))
##language plot
ai_read_f1 <- ai_read1 %>%
mutate(ag = paste0(area, age, gender)) %>%
mutate(age = ifelse(age == "O", "Old", "Young")) %>%
mutate(area = ifelse(area == "N", "North", "South")) %>%
mutate(gender = ifelse(gender == "F", "Female", "Male")) %>%
mutate(ra = paste(area, age)) %>%
mutate(lan = ifelse(lan == "TW", "Min", "Mandarin")) %>%
select(-time) %>%
mutate(f = ifelse(f == 1, "F1", "F2")) %>%
filter(formant != is.na(formant)) %>%
spread(f, formant) %>%
filter(t != 0) %>%
#filter(t != 9) %>%
group_by(Filename, t) %>%
filter(F1 %in% F1[abs(scale(F1)) <= 3], F2 %in% F2[abs(scale(F2)) <= 3]) %>%
ungroup() %>%
group_by(Filename, age, gender, area, ag, t) %>%
mutate(F1 = scale(F1), F2 = scale(F2)) %>%
rename(Region = area) %>%
rename(Language = lan) %>%
ungroup()
a <- ai_read_f1 %>%
filter(t %in% c(1,2,3)) %>%
group_by(Filename, word) %>%
mutate(time = 1) %>%
mutate(F1 = max(F1)) %>%
mutate(F2 = min(F2)) %>%
select(-t) %>%
distinct() %>%
ungroup()
i <- ai_read_f1 %>%
filter(t %in% c(6,7,8,9)) %>%
group_by(Filename, word) %>%
mutate(time = 2) %>%
mutate(F1 = min(F1)) %>%
mutate(F2 = max(F2)) %>%
select(-t) %>%
distinct() %>%
ungroup()
ai1 <- bind_rows(a, i)
aim <- ai1 %>%
group_by(gender, age, Region, Language, time, ra, ag) %>%
reframe(F1 = mean(F1), F2 = mean(F2)) %>%
ungroup() #%>%
#filter(gender == "Female")
ggplot(aim, aes(x = F2, y = F1, color = Language, fill = Language, shape = ra)) +
geom_point(aes(shape = ra, fill = Language), size = 4) +
geom_line(aes(linetype = Language)) +
#geom_smooth(aes(linetype = lan, fill = lan)) +
scale_shape_manual(labels = c("North Old", "North Young", "South Old", "South Young"),
values = c(1, 16, 2, 17)) + # Specify shapes (filled and open)
scale_linetype_manual(labels = c("Mandarin", "Min"),
values = c("dashed", "solid")) +
# scale_fill_manual(labels = c("Mandarin", "Min"),
# values = c("blue", NA)) +
theme_minimal() +
scale_x_reverse() +
scale_y_reverse() +
facet_grid(~gender) +
labs(x = "Normalized F2", y = "Normalized F1") +
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
strip.text.y = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
#legend.position = c(0.93, 0.4),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16)) +
theme(legend.title=element_blank()) +
ggtitle("Lobanov-transformed vowel plot")
#ggsave("f2_lan.png", dpi = 600, width = 8, height = 5)
ai_read_age <- ai_read1 %>%
mutate(ag = paste0(area, age, gender)) %>%
mutate(age = ifelse(age == "O", "Old", "Young")) %>%
mutate(area = ifelse(area == "N", "North", "South")) %>%
mutate(gender = ifelse(gender == "F", "Female", "Male")) %>%
mutate(lan = ifelse(lan == "TW", "Min", "Mandarin")) %>%
mutate(group = paste(area, gender)) %>%
select(-time) %>%
mutate(f = ifelse(f == 1, "F1", "F2")) %>%
filter(formant != is.na(formant)) %>%
spread(f, formant) %>%
group_by(Filename, t) %>%
filter(F1 %in% F1[abs(scale(F1)) <= 3], F2 %in% F2[abs(scale(F2)) <= 3]) %>%
ungroup() %>%
filter(t != 0) %>%
filter(t != 9) %>%
group_by(Filename, age, gender, area, ag) %>%
mutate(F1 = scale(F1), F2 = scale(F2)) %>%
ungroup() #%>%
#filter(lan != "Min") %>%
#filter(gender != "Female")
# group_by(age, gender, area, ag, t) %>%
# summarise(mean_F1 = mean(F1), mean_F2 = mean(F2))
age1 <- ggplot(ai_read_age, aes(x = F2, y = F1, color = age)) +
#geom_point(aes(shape = age),alpha = 0.5) +
geom_smooth(aes(linetype = age, fill = age)) +
#stat_ellipse() +
theme_minimal() +
scale_x_reverse() +
scale_y_reverse() +
#facet_grid(~gender)+
scale_color_manual(labels = c("Old", "Young"),
values = c("#02c39a", "#FF3C38")) +
scale_fill_manual(labels = c("Old", "Young"),
values = c("#02c39a", "#FF3C38")) +
theme_bw() +
labs(linetype = "Age", color = "Age", fill = "Age"
) +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
strip.text.y = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
#legend.position = c(0.93, 0.4),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16)) +
scale_shape_manual(values = 1:10) #+
#ggtitle("Lobanov-transformed vowel plot")
#ggsave("f2_age.png", dpi = 600, width = 8, height = 5)
age1
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
aip <- ai1 %>%
#mutate(pro = ifelse(proficiency >= mean(proficiency), "High", "Low")) %>%
#mutate(pro = ifelse(proficiency >= 0.6, "High", "Low")) %>%
mutate(pro = ifelse(TW >= 5, "High", "Low")) %>%
group_by(gender, age, Region, Language, time, ra, ag, pro) %>%
reframe(F1 = mean(F1), F2 = mean(F2)) %>%
ungroup() #%>%
#filter(gender == "Female")
ggplot(aip, aes(x = F2, y = F1, color = Language, fill = Language, shape = ra)) +
geom_point(aes(shape = ra, fill = Language), size = 4) +
geom_line(aes(linetype = Language)) +
#geom_smooth(aes(linetype = lan, fill = lan)) +
scale_shape_manual(labels = c("North Old", "North Young", "South Old", "South Young"),
values = c(1, 16, 2, 17)) + # Specify shapes (filled and open)
scale_linetype_manual(labels = c("Mandarin", "Min"),
values = c("dashed", "solid")) +
# scale_fill_manual(labels = c("Mandarin", "Min"),
# values = c("blue", NA)) +
theme_minimal() +
scale_x_reverse() +
scale_y_reverse() +
facet_grid(~pro + gender) +
labs(x = "Normalized F2", y = "Normalized F1") +
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
strip.text.y = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
#legend.position = c(0.93, 0.4),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16)) +
theme(legend.title=element_blank()) +
ggtitle("Lobanov-transformed vowel plot")
#ggsave("f2_lan.png", dpi = 600, width = 8, height = 5)
###GY and TW statistics F1
aif1 <- ai1 %>%
#ai_read1 %>%
#mutate(ag = paste0(area, age, gender)) %>%
# mutate(age = ifelse(age == "O", "Old", "Young")) %>%
# mutate(area = ifelse(area == "N", "North", "South")) %>%
# mutate(gender = ifelse(gender == "F", "Female", "Male")) %>%
# mutate(lan = ifelse(lan == "TW", "Min", "Mandarin")) %>%
# # mutate(group = as.factor(group)) %>%
#filter(gender == "Female") %>%
#filter(age == "Old") %>%
#filter(Region == "North") %>%
filter(Language != "Min") %>%
filter(time == 2) %>%
mutate(pro = ifelse(TW >= 5, "High", "Low")) %>%
mutate(Language = as.factor(Language)) %>%
mutate(Filename = as.factor(Filename)) %>%
mutate(context = as.factor(context)) %>%
mutate(age = as.factor(age)) %>%
mutate(Region = as.factor(Region)) %>%
mutate(gender = as.factor(gender))
#model_f <- lmer(F2~area*t + (1 | Filename), data = aif1)
model_f <- lmer(F2~pro*gender*age*Region + (1 | Filename), data = aif1)
## fixed-effect model matrix is rank deficient so dropping 2 columns / coefficients
## boundary (singular) fit: see help('isSingular')
#model_f <- lmer(formant~area*t + (1 | Filename), data = aif1)
coefs <- data.frame(coef(summary(model_f)))
coefs <- coefs %>%
mutate(Estimate = round(Estimate, 4)) %>%
mutate(Std..Error = round(Std..Error, 4)) %>%
mutate(t.value = round(t.value, 4)) %>%
mutate(p.z = round(2*(1-pnorm(abs(coefs$t.value))), 4)) %>%
mutate(p.z = ifelse(2*(1-pnorm(abs(coefs$t.value))) < 0.001, "<.001",
ifelse(2*(1-pnorm(abs(coefs$t.value))) < 0.01, "<.01",
ifelse(2*(1-pnorm(abs(coefs$t.value))) < 0.05, "<.05",
round(2*(1-pnorm(abs(coefs$t.value))), 4)))))
coefs
## Estimate Std..Error t.value p.z
## (Intercept) 0.3730 0.1012 3.6860 <.001
## proLow 0.4986 0.2262 2.2039 <.05
## genderMale 0.0816 0.1440 0.5665 0.571
## ageYoung 0.7493 0.2830 2.6473 <.01
## RegionSouth 0.2599 0.1357 1.9143 0.0556
## proLow:genderMale -0.4294 0.3204 -1.3403 0.1802
## proLow:ageYoung -0.8568 0.3595 -2.3832 <.05
## genderMale:ageYoung -0.5627 0.2437 -2.3091 <.05
## proLow:RegionSouth -0.3634 0.4204 -0.8643 0.3874
## genderMale:RegionSouth -0.0540 0.1979 -0.2729 0.7849
## ageYoung:RegionSouth -0.6483 0.2268 -2.8584 <.01
## proLow:genderMale:ageYoung 0.4440 0.4364 1.0174 0.3089
## proLow:genderMale:RegionSouth 0.3980 0.3400 1.1704 0.2418
## proLow:ageYoung:RegionSouth 0.4934 0.4335 1.1382 0.255
#summary(model_f)
#write.csv(coefs, "table10.csv")
ai_read_gender <- ai_read1 %>%
mutate(ag = paste0(area, age, gender)) %>%
mutate(age = ifelse(age == "O", "Old", "Young")) %>%
mutate(area = ifelse(area == "N", "North", "South")) %>%
mutate(gender = ifelse(gender == "F", "Female", "Male")) %>%
mutate(lan = ifelse(lan == "TW", "Min", "Mandarin")) %>%
mutate(group = paste(area, age)) %>%
select(-time) %>%
mutate(f = ifelse(f == 1, "F1", "F2")) %>%
filter(formant != is.na(formant)) %>%
spread(f, formant) %>%
group_by(Filename, t) %>%
filter(F1 %in% F1[abs(scale(F1)) <= 3], F2 %in% F2[abs(scale(F2)) <= 3]) %>%
ungroup() %>%
filter(t != 0) %>%
group_by(Filename, age, gender, area, ag) %>%
mutate(F1 = scale(F1), F2 = scale(F2)) %>%
ungroup()
# group_by(age, gender, area, ag, t) %>%
# summarise(mean_F1 = mean(F1), mean_F2 = mean(F2))
ggplot(ai_read_gender, aes(x = F2, y = F1, color = gender)) +
#geom_point(aes(shape = age),alpha = 0.5) +
geom_smooth(aes(linetype = gender, fill = gender)) +
#stat_ellipse() +
theme_minimal() +
scale_x_reverse() +
scale_y_reverse() +
#facet_grid(lan~group)+
scale_color_manual(labels = c("Female", "Male"),
values = c("#9d4edd", "#329cff")) +
scale_fill_manual(labels = c("Female", "Male"),
values = c("#9d4edd", "#329cff")) +
theme_bw() +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
strip.text.y = element_text(size = 18),
legend.title = element_text(size = 12),
legend.text = element_text(size = 12),
axis.title = element_text(size = 18),
panel.background=element_rect(colour="black", size = 1),
strip.background = element_rect(color = "black", size = 0.8),
#legend.position = c(0.93, 0.4),
legend.background = element_rect(fill="white",
size=0.3, linetype="solid",
colour ="black"),
axis.text.y = element_text(size = 16)) +
scale_shape_manual(values = 1:10) +
ggtitle("Lobanov-transformed vowel plot")
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
#ggsave("f2_gender.png", dpi = 600, width = 10, height = 6)
ai_readm <- ai_read1 %>%
filter(lan == "TW") %>%
filter(gender == "M") %>%
mutate(lans = "Male TSM") %>%
mutate(pro = ifelse(proficiency >= mean(proficiency), "High", "Low")) %>%
group_by(Filename) %>%
mutate(m = mean(formant), F1 = scale(formant)) %>%
filter(f == "1")
ai_readf <- ai_read1 %>%
filter(lan == "GY") %>%
filter(gender == "F") %>%
mutate(lans = "Female TM") %>%
mutate(pro = ifelse(proficiency >= mean(proficiency), "High", "Low")) %>%
group_by(Filename) %>%
mutate(m = mean(formant), F1 = scale(formant)) %>%
filter(f == "1")
f111 <- bind_rows(ai_readm, ai_readf)
ai_readff <- ai_read1 %>%
filter(lan == "GY") %>%
filter(gender == "F") %>%
mutate(lans = "Male TSM") %>%
mutate(pro = ifelse(proficiency >= mean(proficiency), "High", "Low")) %>%
group_by(Filename) %>%
mutate(m = mean(formant), F2 = scale(formant)) %>%
filter(f == "2")
ai_readmm <- ai_read1 %>%
filter(lan == "TW") %>%
filter(gender == "M") %>%
mutate(lans = "Female TM") %>%
mutate(pro = ifelse(proficiency >= mean(proficiency), "High", "Low")) %>%
group_by(Filename) %>%
mutate(m = mean(formant), F2 = scale(formant)) %>%
filter(f == "2")
f222 <- bind_rows(ai_readmm, ai_readff)
ai_read_fp <- ggplot() +
geom_smooth(data = f111, aes(x = t, y = F1, group = pro, fill = pro, linetype = pro, color = pro), size = 2, se = T) +
geom_smooth(data = f222, aes(x = t, y = F2, group = pro, fill = pro, linetype = pro, color = pro), size = 2, se = T) +
labs(x = "Time", y = "z-score", linetype = "Proficiency",
fill = "Proficiency", color = "Proficiency") +
scale_color_manual(labels = c("High", "Low"),
values = c("#F78914", "#B6EE56")) +
scale_fill_manual(labels = c("High", "Low"),
values = c("#F78914", "#B6EE56")) +
theme_bw() +
facet_grid(~lans) +
theme(plot.title = element_text(size = 18, hjust = 0.5, vjust = 0.5),
axis.text.x = element_text(size = 18),
strip.text.x = element_text(size = 18),
legend.title = element_text(size = 18),
legend.text = element_text(size = 14),
axis.title = element_text(size = 18),
axis.text.y = element_text(size = 16))
ai_read_fp
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
## Warning: Removed 2 rows containing non-finite values (`stat_smooth()`).
#ggsave("read_pro.png", dpi = 600, width = 8, height = 5)
aif1 <- ai_read1 %>%
filter(gender == "F") %>%
mutate(lan = as.factor(lan)) %>%
filter(age == "O") %>%
filter(area == "S") %>%
filter(lan == "TW") %>%
mutate(t = as.numeric(t)) %>%
filter(f == "1")
model_f <- lmer(formant~proficiency*t + (1 | Filename), data = aif1)
coefs <- data.frame(coef(summary(model_f)))
coefs <- coefs %>%
mutate(Estimate = round(Estimate, 4)) %>%
mutate(Std..Error = round(Std..Error, 4)) %>%
mutate(t.value = round(t.value, 4)) %>%
mutate(p.z = round(2*(1-pnorm(abs(coefs$t.value))), 4)) %>%
mutate(p.z = ifelse(2*(1-pnorm(abs(coefs$t.value))) < 0.001, "***",
ifelse(2*(1-pnorm(abs(coefs$t.value))) < 0.01, "**",
ifelse(2*(1-pnorm(abs(coefs$t.value))) < 0.05, "*",
round(2*(1-pnorm(abs(coefs$t.value))), 4)))))
coefs
## Estimate Std..Error t.value p.z
## (Intercept) 919.2616 222.2079 4.1369 ***
## proficiency -21.1037 225.1560 -0.0937 0.9253
## t -14.6667 15.4160 -0.9514 0.3414
## proficiency:t -21.3905 15.6107 -1.3702 0.1706